-
-
Save okaq/1130616 to your computer and use it in GitHub Desktop.
Solution: Runs (Google Code Jam 2011 World Finals Problem A)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
"" Solution: Runs (Google Code Jam 2011 World Finals Problem A) | |
"" by: [email protected] | |
"" run: python A.py infile outfile | |
""" | |
import string | |
import sys | |
from datetime import datetime | |
from itertools import permutations | |
# factorial | |
def factorial(n): | |
return reduce((lambda i,j: i*j), range(1,n+1)) | |
# binomial coefficient | |
def bc(n, k): | |
r = 1 | |
for i in range(1, k+1): | |
r = r * (n-i+1) / i | |
return r | |
# binomial coefficient (memoized) | |
# dict to store memoized data | |
bcmd = {} | |
def bcm(n, k): | |
k0 = bcmk(n, k) | |
if k0 in bcmd: | |
return bcmd[k0] | |
else: | |
bc0 = bc(n, k) | |
bcmd[k0] = bc0 | |
return bc0 | |
# create bcmd keys | |
def bcmk(n, k): | |
return "|".join([str(n), str(k)]) | |
# test bcm | |
# print bcmk(12,2) | |
# print bcm(12, 2) | |
""" | |
print bcmk(64, 8) | |
t0 = datetime.now() | |
print bc(164, 8) | |
t1 = datetime.now() | |
print "bc computed in %d ms" % ((t1 - t0).microseconds) | |
t0 = datetime.now() | |
print bcm(164, 8) | |
t1 = datetime.now() | |
print "bcm computed in %d ms" % ((t1 - t0).microseconds) | |
""" | |
# files | |
fin = file(sys.argv[1]) | |
fout = open(sys.argv[2], 'w') | |
lines = fin.readlines() | |
tests = int(lines[0]) | |
# parse | |
# data contains runs split from string | |
data = {} | |
for i in range(1,tests+1): | |
# print lines[i] | |
runs = [] | |
r0 = None | |
for s in lines[i]: | |
if r0 is None: | |
r0 = s | |
continue | |
if s == r0[0]: | |
# r0[len(s)] = s # item assignment not supported | |
# r0.append(s) # no method append | |
r0 = "".join([r0,s]) | |
else: | |
runs.append(r0) | |
r0 = s | |
data[i] = runs | |
# print "Case #%d: %d runs." % (i, len(runs)) | |
# print "Case #%d: %s" % (i, runs) | |
# print "Case #%d: factorial(%d) = %d" % (i,i,factorial(i)) | |
# print "Case: #%d: factorial(%d) = %d" % (i,len(runs),factorial(len(runs))) | |
# print "Case: #%d: factorial(%d) mod 1000003 = %d" % (i,len(runs),factorial(len(runs))%1000003) | |
# print "\n" | |
# print data | |
# total_runs is the runs count in the input string | |
total_runs = {} | |
for i in range(1, tests+1): | |
total_runs[i] = len(data[i]) | |
# print total_runs | |
# nc is the number of each character in the input string | |
nc = {} | |
for i in range(1, tests+1): | |
nc0 = {} | |
for j in range(0, len(data[i])): | |
# if nc0[data[i][j]] is None: | |
if data[i][j][0] not in nc0: | |
nc0[data[i][j][0]] = len(data[i][j]) | |
else: | |
nc0[data[i][j][0]] = nc0[data[i][j][0]] + len(data[i][j]) | |
nc[i] = nc0 | |
# print nc | |
# print bc(20,14) | |
# list data structure for character histogram | |
freqs = [] | |
ab = [s0 for s0 in string.ascii_lowercase] | |
# print ab | |
for i in range(1, tests+1): | |
freq = [0 for i0 in range(26)] | |
# print freq | |
for s0 in range(len(lines[i])): | |
try: | |
freq[ab.index(lines[i][s0])] += 1 | |
except ValueError: | |
continue | |
freqs.append(freq) | |
# print freqs | |
""" | |
# brute force ;( | |
# foreach permutation calc runs | |
i0 = 0 | |
for r0 in permutations(data[2]): | |
i0 += 1 | |
print r0 | |
print i0 | |
# 100! ~ 10^157 - too slow | |
""" | |
# optimization | |
# strategy generate statistics (len str, possible runs(exclusive), num permutations) | |
# solution from contest analysis: | |
# dynamic programming algo that runs iteratively upto original number of runs | |
# complicated, but fast | |
# count transitions | |
def ct(M, Nc, r0, r): | |
if r0 == 0: | |
# return r == 1 ? 1 : 0 | |
if r == 1: | |
return 1 | |
else: | |
return 0 | |
r1 = 0 | |
dr = r - r0 | |
y = 0 | |
while r0 + 2 * y <= r: | |
x = r - (r0 + 2 * y) | |
nwsx = bcm(r0 + 1, x) | |
nwsy = bcm(M - r0, y) | |
nws = bcm(Nc - 1, x + y - 1) | |
r1 += nwsx * nwsy * nws | |
y += 1 | |
return r1 | |
# solver | |
def solve(freq, runs_goal): | |
runs_count = [0 for i in range(0, runs_goal+1)] | |
runs_count[0] = 1 | |
M = 0 | |
for i, Nc in enumerate(freq): | |
if Nc > 0: | |
runs_count_prev = list(runs_count) | |
runs_count = [0 for i in range(0, runs_goal+1)] | |
r0 = 0 | |
while r0 <= runs_goal: | |
r = r0 + 1 | |
while r <= runs_goal: | |
nw = ct(M, Nc, r0, r) | |
runs_count[r] += nw * runs_count_prev[r0] | |
r += 1 | |
r0 += 1 | |
M += Nc | |
return runs_count[runs_goal] | |
# print solve(freqs[2], total_runs[3]) % 1000003 | |
# print timedelta | |
def td(t0, t1): | |
ms = (t0 - t1).microseconds | |
s = (t0 - t1).seconds | |
r = ".".join([str(s), str(ms)]) | |
return r | |
t0 = datetime.now() | |
for i in range(0, tests): | |
s0 = solve(freqs[i], total_runs[i+1]) | |
s0 = s0 % 1000003 | |
fout.write("Case #%d: %d\n" % (i+1, s0)) | |
t0s = datetime.now() | |
print "split time for case #%d: %s" % (i+1, td(t0s, t0)) | |
t1 = datetime.now() | |
print "total time: %s" % (td(t1,t0)) | |
""" | |
results: | |
small dataset runtime: 406.79s | |
large dataset runtime: 442.54s | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment