radzhome · November 7, 2016 19:32
diff --git a/street name ordinals b/street name ordinals
 import re
 from collections import OrderedDict

 ONETHS = {
    'first': '1ST', 'second': '2ND', 'third': '3RD', 'fourth': '4TH', 'fifth': '5TH', 'sixth': '6TH', 'seventh': '7TH',
    'eighth': '8TH', 'ninth': '9TH'
 }

 TEENTHS = {
    'tenth': '10TH', 'eleventh': '11TH', 'twelfth': '12TH', 'thirteenth': '13TH',
    'fourteenth': '14TH', 'fifteenth': '15TH', 'sixteenth': '16TH', 'seventeenth': '17TH', 'eighteenth': '18TH',
    'nineteenth': '19TH'
 }

 TENTHS = {
    'twentieth': '20TH', 'thirtieth': '30TH', 'fortieth': '40TH', 'fiftieth': '50TH', 'sixtieth': '60TH',
    'seventieth': '70TH', 'eightieth': '80TH', 'ninetieth': '90TH',
 }

 HUNDREDTH = {'hundredth': '100TH'}  # HUNDREDTH not s

 ONES = {'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8',
        'nine': '9'}

 TENS = {'twenty': '20', 'thirty': '30', 'forty': '40', 'fifty': '50', 'sixty': '60', 'seventy': '70', 'eighty': '80',
        'ninety': '90'}

 HUNDRED = {'hundred': '100'}

 # Used below for ALL_ORDINALS
 ALL_THS = {}
 ALL_THS.update(ONETHS)
 ALL_THS.update(TEENTHS)
 ALL_THS.update(TENTHS)
 ALL_THS.update(HUNDREDTH)

 ALL_ORDINALS = OrderedDict()
 ALL_ORDINALS.update(ALL_THS)
 ALL_ORDINALS.update(TENS)
 ALL_ORDINALS.update(HUNDRED)
 ALL_ORDINALS.update(ONES)


 def split_ordinal_word(word):
    ordinals = []
    if not word:
        return ordinals 
    for key, value in ALL_ORDINALS.items():
        if word.startswith(key):
            ordinals.append(key)
            ordinals += split_ordinal_word(word[len(key):])
            break
    return ordinals

 def get_ordinals(s):
    ordinals, start, end = [], [], []
    s = s.strip().replace('-', ' ').replace('and', '').lower()
    # Replace multiple spaces with a single space
    s = re.sub(' +',' ', s)
    s = s.split(' ')
    for word in s:
        found_ordinals = split_ordinal_word(word)
        if found_ordinals:
            ordinals += found_ordinals
        else:  # else if word, for covering blanks
            if ordinals:  # Already have some ordinals
                end.append(word)
            else:
                start.append(word)
    return start, ordinals, end
        
    
 def detect_ordinal_pattern(ordinals):
    ordinal_length = len(ordinals)
    ordinal_string = '' # ' '.join(ordinals)
    if ordinal_length == 1:
        ordinal_string = ALL_ORDINALS[ordinals[0]]
    elif ordinal_length == 2:
        if ordinals[0] in ONES.keys() and ordinals[1] in HUNDREDTH.keys():
            ordinal_string = ONES[ordinals[0]] + '00TH'
        elif ordinals[0] in HUNDRED.keys() and ordinals[1] in ONETHS.keys():
            ordinal_string = HUNDRED[ordinals[0]][:-1] + ONETHS[ordinals[1]]
        elif ordinals[0] in TENS.keys() and ordinals[1] in ONETHS.keys():
            ordinal_string = TENS[ordinals[0]][0] + ONETHS[ordinals[1]]
    elif ordinal_length == 3:
        if ordinals[0] in HUNDRED.keys() and ordinals[1] in TENS.keys() and ordinals[2] in ONETHS.keys():
            ordinal_string = HUNDRED[ordinals[0]][0] + TENS[ordinals[1]][0] + ONETHS[ordinals[2]]
        elif ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in ALL_THS.keys():
            ordinal_string =  ONES[ordinals[0]] + ALL_THS[ordinals[2]]
    elif ordinal_length == 4:
        if ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in TENS.keys() and \
           ordinals[3] in ONETHS.keys():
                ordinal_string = ONES[ordinals[0]] + TENS[ordinals[2]][0] + ONETHS[ordinals[3]]
    return ordinal_string


 # s = '32 one   hundred and forty-third st toronto, on'
 #s = '32 forty-third st toronto, on'
 #s = '32 one-hundredth st toronto, on'
 #s = '32 hundred and third st toronto, on'
 #s = '32 hundred and thirty first st toronto, on'
 # s = '32 nine hundred and twenty third st toronto, on'
 #s = '32 nine hundred and ninety ninth st toronto, on'
 s = '32 sixty sixth toronto, on'

 st, ords, en = get_ordinals(s)
 print ords
 print st, detect_ordinal_pattern(ords), en
	import re
	from collections import OrderedDict

	ONETHS = {
	'first': '1ST', 'second': '2ND', 'third': '3RD', 'fourth': '4TH', 'fifth': '5TH', 'sixth': '6TH', 'seventh': '7TH',
	'eighth': '8TH', 'ninth': '9TH'
	}

	TEENTHS = {
	'tenth': '10TH', 'eleventh': '11TH', 'twelfth': '12TH', 'thirteenth': '13TH',
	'fourteenth': '14TH', 'fifteenth': '15TH', 'sixteenth': '16TH', 'seventeenth': '17TH', 'eighteenth': '18TH',
	'nineteenth': '19TH'
	}

	TENTHS = {
	'twentieth': '20TH', 'thirtieth': '30TH', 'fortieth': '40TH', 'fiftieth': '50TH', 'sixtieth': '60TH',
	'seventieth': '70TH', 'eightieth': '80TH', 'ninetieth': '90TH',
	}

	HUNDREDTH = {'hundredth': '100TH'} # HUNDREDTH not s

	ONES = {'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8',
	'nine': '9'}

	TENS = {'twenty': '20', 'thirty': '30', 'forty': '40', 'fifty': '50', 'sixty': '60', 'seventy': '70', 'eighty': '80',
	'ninety': '90'}

	HUNDRED = {'hundred': '100'}

	# Used below for ALL_ORDINALS
	ALL_THS = {}
	ALL_THS.update(ONETHS)
	ALL_THS.update(TEENTHS)
	ALL_THS.update(TENTHS)
	ALL_THS.update(HUNDREDTH)

	ALL_ORDINALS = OrderedDict()
	ALL_ORDINALS.update(ALL_THS)
	ALL_ORDINALS.update(TENS)
	ALL_ORDINALS.update(HUNDRED)
	ALL_ORDINALS.update(ONES)


	def split_ordinal_word(word):
	ordinals = []
	if not word:
	return ordinals
	for key, value in ALL_ORDINALS.items():
	if word.startswith(key):
	ordinals.append(key)
	ordinals += split_ordinal_word(word[len(key):])
	break
	return ordinals

	def get_ordinals(s):
	ordinals, start, end = [], [], []
	s = s.strip().replace('-', ' ').replace('and', '').lower()
	# Replace multiple spaces with a single space
	s = re.sub(' +',' ', s)
	s = s.split(' ')
	for word in s:
	found_ordinals = split_ordinal_word(word)
	if found_ordinals:
	ordinals += found_ordinals
	else: # else if word, for covering blanks
	if ordinals: # Already have some ordinals
	end.append(word)
	else:
	start.append(word)
	return start, ordinals, end


	def detect_ordinal_pattern(ordinals):
	ordinal_length = len(ordinals)
	ordinal_string = '' # ' '.join(ordinals)
	if ordinal_length == 1:
	ordinal_string = ALL_ORDINALS[ordinals[0]]
	elif ordinal_length == 2:
	if ordinals[0] in ONES.keys() and ordinals[1] in HUNDREDTH.keys():
	ordinal_string = ONES[ordinals[0]] + '00TH'
	elif ordinals[0] in HUNDRED.keys() and ordinals[1] in ONETHS.keys():
	ordinal_string = HUNDRED[ordinals[0]][:-1] + ONETHS[ordinals[1]]
	elif ordinals[0] in TENS.keys() and ordinals[1] in ONETHS.keys():
	ordinal_string = TENS[ordinals[0]][0] + ONETHS[ordinals[1]]
	elif ordinal_length == 3:
	if ordinals[0] in HUNDRED.keys() and ordinals[1] in TENS.keys() and ordinals[2] in ONETHS.keys():
	ordinal_string = HUNDRED[ordinals[0]][0] + TENS[ordinals[1]][0] + ONETHS[ordinals[2]]
	elif ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in ALL_THS.keys():
	ordinal_string = ONES[ordinals[0]] + ALL_THS[ordinals[2]]
	elif ordinal_length == 4:
	if ordinals[0] in ONES.keys() and ordinals[1] in HUNDRED.keys() and ordinals[2] in TENS.keys() and \
	ordinals[3] in ONETHS.keys():
	ordinal_string = ONES[ordinals[0]] + TENS[ordinals[2]][0] + ONETHS[ordinals[3]]
	return ordinal_string


	# s = '32 one hundred and forty-third st toronto, on'
	#s = '32 forty-third st toronto, on'
	#s = '32 one-hundredth st toronto, on'
	#s = '32 hundred and third st toronto, on'
	#s = '32 hundred and thirty first st toronto, on'
	# s = '32 nine hundred and twenty third st toronto, on'
	#s = '32 nine hundred and ninety ninth st toronto, on'
	s = '32 sixty sixth toronto, on'

	st, ords, en = get_ordinals(s)
	print ords
	print st, detect_ordinal_pattern(ords), en