Last active
October 15, 2020 05:11
-
-
Save Faxn/8077830 to your computer and use it in GitHub Desktop.
We are given the following problem: Given a text file with 999,999 lines, one number per line,
numbers in random order from 1 to 1,000,000 but a single number is
missing, figure out what number is missing.
Source: http://blog.moertel.com/posts/2013-12-14-great-old-timey-game-programming-hack.html#comment-1165807320
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, locale, timeit, random | |
from math import * | |
""" | |
We are given the following problem: | |
Given a text file with 999,999 lines, one number per line, | |
numbers in random order from 1 to 1,000,000 but a single number is | |
missing, figure out what number is missing. | |
Source: http://blog.moertel.com/posts/2013-12-14-great-old-timey-game-programming-hack.html#comment-1165807320 | |
My results from running this script: | |
>python fun.py | |
The sum method took and average of 23.197442 seconds over 10 tries. | |
The count method took and average of 10.142114 seconds over 10 tries. | |
""" | |
######################################################################## | |
# Functions to generate test files | |
######################################################################## | |
def write_challenge_file( write_to, max_num, missing_num): | |
""" | |
Writes a file to the path specified with max_num-1 lines, each | |
containing a number between 1 and max_num inclusive. missing_num | |
is the only number in that range that isn't in the file. | |
This implementation puts the numbers sorted in order, but since | |
these inplementations don't takeadvantage of that that doesn't | |
matter for speed comparisions. | |
""" | |
digets = ceil(log10(max_num)) #number of digets to pad out to | |
fmtstr = "%0"+str(digets)+"d\n" | |
for i in range(1, max_num+1): | |
if i != missing_num: | |
write_to.write(fmtstr % i) | |
def write_challenge(write_to_filename, high_num, missing_num): | |
with open(write_to_filename, 'w') as out_file: | |
return write_challenge_file(out_file, high_num, missing_num) | |
######################################################################## | |
# Solution by Summing numbers | |
######################################################################## | |
def missing_sum(filename, max_num=1000000): | |
""" | |
Finds the missing number in the file by comparing the sum of all the | |
ints in the file to the sum of all integers from 1 to max_num. | |
""" | |
file_sum = 0 | |
with open(filename) as infile: | |
for line in infile: | |
file_sum += locale.atoi(line) | |
range_sum = max_num * (max_num+1) /2 | |
return range_sum - file_sum | |
######################################################################## | |
# Solution by character counting | |
######################################################################## | |
atoi_dict = {} | |
for i in range(0, 10): | |
atoi_dict[str(i)] = i | |
def get_count_list(lines, digets): | |
counts = [] #list[diget][value] | |
for i in range(0, digets): | |
counts.append([0]*10) | |
for line in lines: | |
for d in range(0, digets): | |
v = atoi_dict[line[d]] | |
counts[d][v] = counts[d][v]+1 | |
return counts | |
def get_full_count(max_num=1000000): | |
digets = ceil(log10(max_num)) | |
fmtstr = "%0"+str(digets)+"d\n" | |
return get_count_list(map(lambda x:fmtstr % x , range(1, max_num+1)), digets) | |
def missing_count(filename, max_num=1000000, full_count=None): | |
""" | |
Finds the missing number by counting the occurences of digets in a | |
2d list. | |
""" | |
digets = ceil(log10(max_num)) | |
if full_count==None: | |
full_count=get_full_count(max_num) | |
with open(filename, 'r') as infile: | |
count = get_count_list(infile, digets) | |
missingno = 0 | |
for d in range(0, len(count)): | |
for v in range(0, len(count[0])): | |
if full_count[d][v] != count[d][v]: | |
missingno += v * 10**(digets-d-1) | |
return missingno | |
######################################################################## | |
# main test | |
######################################################################## | |
def main(max_num=1000000, missingno=-1): | |
if missingno < 1: | |
missingno = random.randint(1, max_num) | |
challenge_filename = str(missingno)+".txt" | |
write_challenge(challenge_filename, max_num, missingno) | |
tries=1 | |
sum_time = timeit.timeit('fun.missing_sum("%s", %d)' %(challenge_filename, max_num), number = tries, setup = "import fun") | |
print("The sum method took and average of %f seconds over %d tries." %(sum_time/tries, tries)) | |
count_time = timeit.timeit('fun.missing_count("%s", %d)' %(challenge_filename, max_num), number = tries, setup = "import fun") | |
print("The count method took and average of %f seconds over %d tries." %(count_time/tries, tries)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment