maxrothman · October 14, 2014 17:10
diff --git a/ita_word_numbers.py b/ita_word_numbers.py
 '''My solution to the ITA puzzle "Word Numbers"
 <http://www.itasoftware.com/careers/puzzle_archive.html>

 "If the integers from 1 to 999,999,999 are written as words,
 sorted alphabetically, and concatenated, what is the 51 billionth letter?" 
 
 To be precise: if the integers from 1 to 999,999,999 are expressed in words 
 (omitting spaces, 'and', and punctuation[1]), and sorted alphabetically 
 so that the first six integers are
 
 eight
 eighteen
 eighteenmillion
 eighteenmillioneight
 eighteenmillioneighteen
 eighteenmillioneighteenthousand
 ...
 twothousandtwohundredtwo

 then reading top to bottom, left to right, the 28th letter completes 
 the spelling of the integer "eighteenmillion".
 
 The 51 billionth letter also completes the spelling of an integer. 
 Which one, and what is the sum of all the integers to that point?
 
 [1] For example, 911,610,034 is written 
 "ninehundredelevenmillionsixhundredtenthousandthirtyfour"; 
 500,000,000 is written "fivehundredmillion"; 
 1,709 is written "onethousandsevenhundrednine".
 '''

 import heapq

 ones = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
 teens = ['ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen',
 		'seventeen', 'eighteen', 'nineteen']
 tens = ['twenty', 'thirty', 'fourty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
 hundred = ['hundred']
 mags = ['thousand', 'million']
 # 'start' must be higher than any other mags
 mags_val = {'thousand': 3, 'million': 6, 'billion': 9, 'start': 10}
 vals = {w:i+1 for i,w in enumerate(ones)}
 vals.update({w:i+10 for i,w in enumerate(teens)})
 vals.update({w:(i+2)*10 for i,w in enumerate(tens)})

 #cache of combined lists
 hundred_mags = hundred + mags
 ones_mags = ones + mags
 ones_teens_tens_mags = ones + teens + tens + mags
 ones_teens_tens = ones + teens + tens


 def generate_nums(cur_set, word=(), prev_mag='start', no_hundreds=False):
 	'''A generator that yields number words in alphabetical order'''
 	# How it works:
 	# 'potentials' represents a search front. First, 'potentials' is populated
 	# with all valid number words that start with 'word' and corresponding instances
 	# of generate_nums with 'word'=that word. Then, the alphabetically first one is yielded
 	# and replaced with a result produced by its generator.

 	potentials = []
 	new_prev_mag = prev_mag
 	new_no_hundreds = no_hundreds

 	# Generate a list of valid number words starting with <word> using <cur_set>
 	for num in cur_set:
 		# Ensure that 10^3ns appear in decreasing magnitude, and every time we hit a new mag, hundreds are allowed
 		if num in mags:
 			if not mags_val[num] < mags_val[prev_mag]:
 				continue
 			else:
 				new_prev_mag = num
 				new_no_hundreds = False
 		# Ensure hundreds don't appear in the wrong places
 		elif num in hundred or num in tens:
 			if no_hundreds:
 				continue
 			else:
 				new_no_hundreds = True

 		# Set the next set based on language patterns
 		#TODO: try inline cache
 		if num in ones:
 			new_set = hundred_mags
 		elif num in teens:
 			new_set = mags
 		elif num in tens:
 			new_set = ones_mags
 		elif num in hundred:
 			new_set = ones_teens_tens_mags
 		elif num in mags:
 			new_set = ones_teens_tens
 		else:
 			raise ValueError("cur_set has a num not in any sets: " + num)

 		# add the word to the list of potential minimums
 		heapq.heappush(potentials, (''.join(word+(num,)), word+(num,), generate_nums(new_set, word+(num,), new_prev_mag, new_no_hundreds)))

 	# As long as there are potentials in the list, get the minimum, then replace it with the result produced by its generator
 	while len(potentials) > 0:
 		_, min_word, min_word_gen = heapq.heappop(potentials)
 		yield min_word
 		try:
 			next_word = next(min_word_gen)
 			heapq.heappush(potentials, (''.join(next_word), next_word, min_word_gen))
 		except StopIteration:
 			pass

 def word_to_number(word):
 	'''Converts a list of words to the number they describe'''
 	total = running_total = 0
 	for w in word:
 		if w in vals:
 			running_total += vals[w]
 		elif w == 'hundred':
 			running_total *= 100
 		elif w in mags_val:
 			total += running_total * 10**mags_val[w]
 			running_total = 0
 	return total + running_total



 def main():
 	stop_count = 5e5
 	#stop_count = 51e9
 	print_mult = 0
 	
 	stop_count = int(stop_count)
 	total = letter_count = word_count = 0

 	for word in generate_nums(ones + teens + tens):
 		letter_count += sum(map(len, word))
 		word_count += 1
 		value = word_to_number(word)
 		total += value
 		if print_mult and word_count % print_mult == 0:
 			print "wc:{:.3e}  lc:{:.3e}  v:{:<8d}  t:{:.3e}  {}".format(word_count, letter_count, value, total, ''.join(word))
 			#pass
 		if letter_count >= stop_count:
 			word = ''.join(word)
 			diff = letter_count - stop_count
 			print('')
 			print('words: {:.3e}   letters: {:.3e}   total: {:d}'.format(word_count, letter_count, total))
 			#print("words: {:d} letters: {:d} total: {:d}".format(word_count, letter_count, total))
 			if diff == 0:
 				print(word[:-diff-1] + '\033[1;31m' + word[-diff-1] + '\033[m')
 			else:
 				print(word[:-diff-1] + '\033[1;31m' + word[-diff-1] + '\033[m' + word[-diff:])
 			
 			break


 def printem():
 	i=0
 	for word in generate_nums(ones + teens + tens):
 		print word
 		i += 1
 		if i>30: break


 if __name__ == "__main__":
 	main()
 	#print word_to_number(('eighteen', 'million', 'eighteen', 'thousand', 'eight', 'hundred', 'fifty', 'six'))
 	#printem()
	'''My solution to the ITA puzzle "Word Numbers"
	<http://www.itasoftware.com/careers/puzzle_archive.html>

	"If the integers from 1 to 999,999,999 are written as words,
	sorted alphabetically, and concatenated, what is the 51 billionth letter?"

	To be precise: if the integers from 1 to 999,999,999 are expressed in words
	(omitting spaces, 'and', and punctuation[1]), and sorted alphabetically
	so that the first six integers are

	eight
	eighteen
	eighteenmillion
	eighteenmillioneight
	eighteenmillioneighteen
	eighteenmillioneighteenthousand
	...
	twothousandtwohundredtwo

	then reading top to bottom, left to right, the 28th letter completes
	the spelling of the integer "eighteenmillion".

	The 51 billionth letter also completes the spelling of an integer.
	Which one, and what is the sum of all the integers to that point?

	[1] For example, 911,610,034 is written
	"ninehundredelevenmillionsixhundredtenthousandthirtyfour";
	500,000,000 is written "fivehundredmillion";
	1,709 is written "onethousandsevenhundrednine".
	'''

	import heapq

	ones = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
	teens = ['ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen',
	'seventeen', 'eighteen', 'nineteen']
	tens = ['twenty', 'thirty', 'fourty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
	hundred = ['hundred']
	mags = ['thousand', 'million']
	# 'start' must be higher than any other mags
	mags_val = {'thousand': 3, 'million': 6, 'billion': 9, 'start': 10}
	vals = {w:i+1 for i,w in enumerate(ones)}
	vals.update({w:i+10 for i,w in enumerate(teens)})
	vals.update({w:(i+2)*10 for i,w in enumerate(tens)})

	#cache of combined lists
	hundred_mags = hundred + mags
	ones_mags = ones + mags
	ones_teens_tens_mags = ones + teens + tens + mags
	ones_teens_tens = ones + teens + tens


	def generate_nums(cur_set, word=(), prev_mag='start', no_hundreds=False):
	'''A generator that yields number words in alphabetical order'''
	# How it works:
	# 'potentials' represents a search front. First, 'potentials' is populated
	# with all valid number words that start with 'word' and corresponding instances
	# of generate_nums with 'word'=that word. Then, the alphabetically first one is yielded
	# and replaced with a result produced by its generator.

	potentials = []
	new_prev_mag = prev_mag
	new_no_hundreds = no_hundreds

	# Generate a list of valid number words starting with <word> using <cur_set>
	for num in cur_set:
	# Ensure that 10^3ns appear in decreasing magnitude, and every time we hit a new mag, hundreds are allowed
	if num in mags:
	if not mags_val[num] < mags_val[prev_mag]:
	continue
	else:
	new_prev_mag = num
	new_no_hundreds = False
	# Ensure hundreds don't appear in the wrong places
	elif num in hundred or num in tens:
	if no_hundreds:
	continue
	else:
	new_no_hundreds = True

	# Set the next set based on language patterns
	#TODO: try inline cache
	if num in ones:
	new_set = hundred_mags
	elif num in teens:
	new_set = mags
	elif num in tens:
	new_set = ones_mags
	elif num in hundred:
	new_set = ones_teens_tens_mags
	elif num in mags:
	new_set = ones_teens_tens
	else:
	raise ValueError("cur_set has a num not in any sets: " + num)

	# add the word to the list of potential minimums
	heapq.heappush(potentials, (''.join(word+(num,)), word+(num,), generate_nums(new_set, word+(num,), new_prev_mag, new_no_hundreds)))

	# As long as there are potentials in the list, get the minimum, then replace it with the result produced by its generator
	while len(potentials) > 0:
	_, min_word, min_word_gen = heapq.heappop(potentials)
	yield min_word
	try:
	next_word = next(min_word_gen)
	heapq.heappush(potentials, (''.join(next_word), next_word, min_word_gen))
	except StopIteration:
	pass

	def word_to_number(word):
	'''Converts a list of words to the number they describe'''
	total = running_total = 0
	for w in word:
	if w in vals:
	running_total += vals[w]
	elif w == 'hundred':
	running_total *= 100
	elif w in mags_val:
	total += running_total * 10**mags_val[w]
	running_total = 0
	return total + running_total



	def main():
	stop_count = 5e5
	#stop_count = 51e9
	print_mult = 0

	stop_count = int(stop_count)
	total = letter_count = word_count = 0

	for word in generate_nums(ones + teens + tens):
	letter_count += sum(map(len, word))
	word_count += 1
	value = word_to_number(word)
	total += value
	if print_mult and word_count % print_mult == 0:
	print "wc:{:.3e} lc:{:.3e} v:{:<8d} t:{:.3e} {}".format(word_count, letter_count, value, total, ''.join(word))
	#pass
	if letter_count >= stop_count:
	word = ''.join(word)
	diff = letter_count - stop_count
	print('')
	print('words: {:.3e} letters: {:.3e} total: {:d}'.format(word_count, letter_count, total))
	#print("words: {:d} letters: {:d} total: {:d}".format(word_count, letter_count, total))
	if diff == 0:
	print(word[:-diff-1] + '\033[1;31m' + word[-diff-1] + '\033[m')
	else:
	print(word[:-diff-1] + '\033[1;31m' + word[-diff-1] + '\033[m' + word[-diff:])

	break


	def printem():
	i=0
	for word in generate_nums(ones + teens + tens):
	print word
	i += 1
	if i>30: break


	if __name__ == "__main__":
	main()
	#print word_to_number(('eighteen', 'million', 'eighteen', 'thousand', 'eight', 'hundred', 'fifty', 'six'))
	#printem()