Last active
November 7, 2016 19:32
-
-
Save radzhome/58ffc9bf0fb626cbe7a0c054079597bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import OrderedDict | |
ONETHS = { | |
'first': '1ST', 'second': '2ND', 'third': '3RD', 'fourth': '4TH', 'fifth': '5TH', 'sixth': '6TH', 'seventh': '7TH', | |
'eighth': '8TH', 'ninth': '9TH' | |
} | |
TEENTHS = { | |
'tenth': '10TH', 'eleventh': '11TH', 'twelfth': '12TH', 'thirteenth': '13TH', | |
'fourteenth': '14TH', 'fifteenth': '15TH', 'sixteenth': '16TH', 'seventeenth': '17TH', 'eighteenth': '18TH', | |
'nineteenth': '19TH' | |
} | |
TENTHS = { | |
'twentieth': '20TH', 'thirtieth': '30TH', 'fortieth': '40TH', 'fiftieth': '50TH', 'sixtieth': '60TH', | |
'seventieth': '70TH', 'eightieth': '80TH', 'ninetieth': '90TH', | |
} | |
HUNDREDTH = {'hundredth': '100TH'} # HUNDREDTH not s | |
ONES = {'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', | |
'nine': '9'} | |
TENS = {'twenty': '20', 'thirty': '30', 'forty': '40', 'fifty': '50', 'sixty': '60', 'seventy': '70', 'eighty': '80', | |
'ninety': '90'} | |
HUNDRED = {'hundred': '100'} | |
# Used below for ALL_ORDINALS | |
ALL_THS = {} | |
ALL_THS.update(ONETHS) | |
ALL_THS.update(TEENTHS) | |
ALL_THS.update(TENTHS) | |
ALL_THS.update(HUNDREDTH) | |
ALL_ORDINALS = OrderedDict() | |
ALL_ORDINALS.update(ALL_THS) | |
ALL_ORDINALS.update(TENS) | |
ALL_ORDINALS.update(HUNDRED) | |
ALL_ORDINALS.update(ONES) | |
def split_ordinal_word(word): | |
ordinals = [] | |
if not word: | |
return ordinals | |
for key, value in ALL_ORDINALS.items(): | |
if word.startswith(key): | |
ordinals.append(key) | |
ordinals += split_ordinal_word(word[len(key):]) | |
break | |
return ordinals | |
def get_ordinals(s): | |
ordinals, start, end = [], [], [] | |
s = s.strip().replace('-', ' ').replace('and', '').lower() | |
# Replace multiple spaces with a single space | |
s = re.sub(' +',' ', s) | |
s = s.split(' ') | |
for word in s: | |
found_ordinals = split_ordinal_word(word) | |
if found_ordinals: | |
ordinals += found_ordinals | |
else: # else if word, for covering blanks | |
if ordinals: # Already have some ordinals | |
end.append(word) | |
else: | |
start.append(word) | |
return start, ordinals, end | |
def detect_ordinal_pattern(ordinals): | |
ordinal_length = len(ordinals) | |
ordinal_string = '' # ' '.join(ordinals) | |
if ordinal_length == 1: | |
ordinal_string = ALL_ORDINALS[ordinals[0]] | |
elif ordinal_length == 2: | |
if ordinals[0] in ONES.keys() and ordinals[1] in HUNDREDTH.keys(): | |
ordinal_string = ONES[ordinals[0]] + '00TH' | |
elif ordinals[0] in HUNDRED.keys() and ordinals[1] in ONETHS.keys(): | |
ordinal_string = HUNDRED[ordinals[0]][:-1] + ONETHS[ordinals[1]] | |
elif ordinals[0] in TENS.keys() and ordinals[1] in ONETHS.keys(): | |
ordinal_string = TENS[ordinals[0]][0] + ONETHS[ordinals[1]] | |
elif ordinal_length == 3: | |
if ordinals[0] in HUNDRED.keys() and ordinals[1] in TENS.keys() and ordinals[2] in ONETHS.keys(): | |
ordinal_string = HUNDRED[ordinals[0]][0] + TENS[ordinals[1]][0] + ONETHS[ordinals[2]] | |
elif ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in ALL_THS.keys(): | |
ordinal_string = ONES[ordinals[0]] + ALL_THS[ordinals[2]] | |
elif ordinal_length == 4: | |
if ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in TENS.keys() and \ | |
ordinals[3] in ONETHS.keys(): | |
ordinal_string = ONES[ordinals[0]] + TENS[ordinals[2]][0] + ONETHS[ordinals[3]] | |
return ordinal_string | |
# s = '32 one hundred and forty-third st toronto, on' | |
#s = '32 forty-third st toronto, on' | |
#s = '32 one-hundredth st toronto, on' | |
#s = '32 hundred and third st toronto, on' | |
#s = '32 hundred and thirty first st toronto, on' | |
# s = '32 nine hundred and twenty third st toronto, on' | |
#s = '32 nine hundred and ninety ninth st toronto, on' | |
s = '32 sixty sixth toronto, on' | |
st, ords, en = get_ordinals(s) | |
print ords | |
print st, detect_ordinal_pattern(ords), en | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment