mrdaemon · December 12, 2014 07:23
diff --git a/parser.py b/parser.py
 def tokenize(string):

    # Regular expression to extract blocks
    _recipient = re.compile(r'^(.+?)\s(in|to|about)\b',
                            re.IGNORECASE)
    _time = re.compile(r'\bin\s(\d+\s\b.+?\b)(?:\sto\b|\sabout\b|$)',
                            re.IGNORECASE)
    _message = re.compile(r'\b(?:to|about)\s(.+?)(?:\sin\b|$)',
                            re.IGNORECASE)

    recipientmatch = _recipient.search(string)
    timematch = _time.search(string)
    messagematch = _message.search(string)

    if recipientmatch is None:
        raise ReminderSyntaxError("Remind who?")

    if timematch is None:
        raise ReminderSyntaxError("Remind when?")

    if messagematch is None:
        raise ReminderSyntaxError("Remind what..?")

    # Fetch groups, process further and pack as tokens

    # Construct rough, unvalidated list of usernames, using
    # "and" and "," as delimiters to split on. The further
    # tokenization simplifies the parser's operation.
    recipientdelims = re.compile(r'\b,\sand\b\s|\b,\s|\s\band\b\s',
                                    re.IGNORECASE)
    recipientgroups = recipientmatch.group(1)
    recipientblock = recipientdelims.split(recipientgroups)

    # Time is just split in unavlidated elements. Should contain two,
    # the numerical value and the human readable time unit.
    timegroup = timematch.group(1)
    timeblock = timegroup.split()

    # message block is just kept as-is, as a string.
    # It never really gets processed anyways.
    messageblock = messagematch.group(1)

    return {"recipient": recipientblock,
        "time": timeblock, "message": messageblock }
	def tokenize(string):

	# Regular expression to extract blocks
	_recipient = re.compile(r'^(.+?)\s(in\|to\|about)\b',
	re.IGNORECASE)
	_time = re.compile(r'\bin\s(\d+\s\b.+?\b)(?:\sto\b\|\sabout\b\|$)',
	re.IGNORECASE)
	_message = re.compile(r'\b(?:to\|about)\s(.+?)(?:\sin\b\|$)',
	re.IGNORECASE)

	recipientmatch = _recipient.search(string)
	timematch = _time.search(string)
	messagematch = _message.search(string)

	if recipientmatch is None:
	raise ReminderSyntaxError("Remind who?")

	if timematch is None:
	raise ReminderSyntaxError("Remind when?")

	if messagematch is None:
	raise ReminderSyntaxError("Remind what..?")

	# Fetch groups, process further and pack as tokens

	# Construct rough, unvalidated list of usernames, using
	# "and" and "," as delimiters to split on. The further
	# tokenization simplifies the parser's operation.
	recipientdelims = re.compile(r'\b,\sand\b\s\|\b,\s\|\s\band\b\s',
	re.IGNORECASE)
	recipientgroups = recipientmatch.group(1)
	recipientblock = recipientdelims.split(recipientgroups)

	# Time is just split in unavlidated elements. Should contain two,
	# the numerical value and the human readable time unit.
	timegroup = timematch.group(1)
	timeblock = timegroup.split()

	# message block is just kept as-is, as a string.
	# It never really gets processed anyways.
	messageblock = messagematch.group(1)

	return {"recipient": recipientblock,
	"time": timeblock, "message": messageblock }