Skip to content

Instantly share code, notes, and snippets.

@Faxn
Last active October 15, 2020 05:11
Show Gist options
  • Save Faxn/8077830 to your computer and use it in GitHub Desktop.
Save Faxn/8077830 to your computer and use it in GitHub Desktop.
We are given the following problem: Given a text file with 999,999 lines, one number per line, numbers in random order from 1 to 1,000,000 but a single number is missing, figure out what number is missing. Source: http://blog.moertel.com/posts/2013-12-14-great-old-timey-game-programming-hack.html#comment-1165807320
import argparse, locale, timeit, random
from math import *
"""
We are given the following problem:
Given a text file with 999,999 lines, one number per line,
numbers in random order from 1 to 1,000,000 but a single number is
missing, figure out what number is missing.
Source: http://blog.moertel.com/posts/2013-12-14-great-old-timey-game-programming-hack.html#comment-1165807320
My results from running this script:
>python fun.py
The sum method took and average of 23.197442 seconds over 10 tries.
The count method took and average of 10.142114 seconds over 10 tries.
"""
########################################################################
# Functions to generate test files
########################################################################
def write_challenge_file( write_to, max_num, missing_num):
"""
Writes a file to the path specified with max_num-1 lines, each
containing a number between 1 and max_num inclusive. missing_num
is the only number in that range that isn't in the file.
This implementation puts the numbers sorted in order, but since
these inplementations don't takeadvantage of that that doesn't
matter for speed comparisions.
"""
digets = ceil(log10(max_num)) #number of digets to pad out to
fmtstr = "%0"+str(digets)+"d\n"
for i in range(1, max_num+1):
if i != missing_num:
write_to.write(fmtstr % i)
def write_challenge(write_to_filename, high_num, missing_num):
with open(write_to_filename, 'w') as out_file:
return write_challenge_file(out_file, high_num, missing_num)
########################################################################
# Solution by Summing numbers
########################################################################
def missing_sum(filename, max_num=1000000):
"""
Finds the missing number in the file by comparing the sum of all the
ints in the file to the sum of all integers from 1 to max_num.
"""
file_sum = 0
with open(filename) as infile:
for line in infile:
file_sum += locale.atoi(line)
range_sum = max_num * (max_num+1) /2
return range_sum - file_sum
########################################################################
# Solution by character counting
########################################################################
atoi_dict = {}
for i in range(0, 10):
atoi_dict[str(i)] = i
def get_count_list(lines, digets):
counts = [] #list[diget][value]
for i in range(0, digets):
counts.append([0]*10)
for line in lines:
for d in range(0, digets):
v = atoi_dict[line[d]]
counts[d][v] = counts[d][v]+1
return counts
def get_full_count(max_num=1000000):
digets = ceil(log10(max_num))
fmtstr = "%0"+str(digets)+"d\n"
return get_count_list(map(lambda x:fmtstr % x , range(1, max_num+1)), digets)
def missing_count(filename, max_num=1000000, full_count=None):
"""
Finds the missing number by counting the occurences of digets in a
2d list.
"""
digets = ceil(log10(max_num))
if full_count==None:
full_count=get_full_count(max_num)
with open(filename, 'r') as infile:
count = get_count_list(infile, digets)
missingno = 0
for d in range(0, len(count)):
for v in range(0, len(count[0])):
if full_count[d][v] != count[d][v]:
missingno += v * 10**(digets-d-1)
return missingno
########################################################################
# main test
########################################################################
def main(max_num=1000000, missingno=-1):
if missingno < 1:
missingno = random.randint(1, max_num)
challenge_filename = str(missingno)+".txt"
write_challenge(challenge_filename, max_num, missingno)
tries=1
sum_time = timeit.timeit('fun.missing_sum("%s", %d)' %(challenge_filename, max_num), number = tries, setup = "import fun")
print("The sum method took and average of %f seconds over %d tries." %(sum_time/tries, tries))
count_time = timeit.timeit('fun.missing_count("%s", %d)' %(challenge_filename, max_num), number = tries, setup = "import fun")
print("The count method took and average of %f seconds over %d tries." %(count_time/tries, tries))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment