Skip to content

Instantly share code, notes, and snippets.

@okaq
Created August 7, 2011 18:15
Show Gist options
  • Save okaq/1130616 to your computer and use it in GitHub Desktop.
Save okaq/1130616 to your computer and use it in GitHub Desktop.
Solution: Runs (Google Code Jam 2011 World Finals Problem A)
"""
"" Solution: Runs (Google Code Jam 2011 World Finals Problem A)
"" by: [email protected]
"" run: python A.py infile outfile
"""
import string
import sys
from datetime import datetime
from itertools import permutations
# factorial
def factorial(n):
return reduce((lambda i,j: i*j), range(1,n+1))
# binomial coefficient
def bc(n, k):
r = 1
for i in range(1, k+1):
r = r * (n-i+1) / i
return r
# binomial coefficient (memoized)
# dict to store memoized data
bcmd = {}
def bcm(n, k):
k0 = bcmk(n, k)
if k0 in bcmd:
return bcmd[k0]
else:
bc0 = bc(n, k)
bcmd[k0] = bc0
return bc0
# create bcmd keys
def bcmk(n, k):
return "|".join([str(n), str(k)])
# test bcm
# print bcmk(12,2)
# print bcm(12, 2)
"""
print bcmk(64, 8)
t0 = datetime.now()
print bc(164, 8)
t1 = datetime.now()
print "bc computed in %d ms" % ((t1 - t0).microseconds)
t0 = datetime.now()
print bcm(164, 8)
t1 = datetime.now()
print "bcm computed in %d ms" % ((t1 - t0).microseconds)
"""
# files
fin = file(sys.argv[1])
fout = open(sys.argv[2], 'w')
lines = fin.readlines()
tests = int(lines[0])
# parse
# data contains runs split from string
data = {}
for i in range(1,tests+1):
# print lines[i]
runs = []
r0 = None
for s in lines[i]:
if r0 is None:
r0 = s
continue
if s == r0[0]:
# r0[len(s)] = s # item assignment not supported
# r0.append(s) # no method append
r0 = "".join([r0,s])
else:
runs.append(r0)
r0 = s
data[i] = runs
# print "Case #%d: %d runs." % (i, len(runs))
# print "Case #%d: %s" % (i, runs)
# print "Case #%d: factorial(%d) = %d" % (i,i,factorial(i))
# print "Case: #%d: factorial(%d) = %d" % (i,len(runs),factorial(len(runs)))
# print "Case: #%d: factorial(%d) mod 1000003 = %d" % (i,len(runs),factorial(len(runs))%1000003)
# print "\n"
# print data
# total_runs is the runs count in the input string
total_runs = {}
for i in range(1, tests+1):
total_runs[i] = len(data[i])
# print total_runs
# nc is the number of each character in the input string
nc = {}
for i in range(1, tests+1):
nc0 = {}
for j in range(0, len(data[i])):
# if nc0[data[i][j]] is None:
if data[i][j][0] not in nc0:
nc0[data[i][j][0]] = len(data[i][j])
else:
nc0[data[i][j][0]] = nc0[data[i][j][0]] + len(data[i][j])
nc[i] = nc0
# print nc
# print bc(20,14)
# list data structure for character histogram
freqs = []
ab = [s0 for s0 in string.ascii_lowercase]
# print ab
for i in range(1, tests+1):
freq = [0 for i0 in range(26)]
# print freq
for s0 in range(len(lines[i])):
try:
freq[ab.index(lines[i][s0])] += 1
except ValueError:
continue
freqs.append(freq)
# print freqs
"""
# brute force ;(
# foreach permutation calc runs
i0 = 0
for r0 in permutations(data[2]):
i0 += 1
print r0
print i0
# 100! ~ 10^157 - too slow
"""
# optimization
# strategy generate statistics (len str, possible runs(exclusive), num permutations)
# solution from contest analysis:
# dynamic programming algo that runs iteratively upto original number of runs
# complicated, but fast
# count transitions
def ct(M, Nc, r0, r):
if r0 == 0:
# return r == 1 ? 1 : 0
if r == 1:
return 1
else:
return 0
r1 = 0
dr = r - r0
y = 0
while r0 + 2 * y <= r:
x = r - (r0 + 2 * y)
nwsx = bcm(r0 + 1, x)
nwsy = bcm(M - r0, y)
nws = bcm(Nc - 1, x + y - 1)
r1 += nwsx * nwsy * nws
y += 1
return r1
# solver
def solve(freq, runs_goal):
runs_count = [0 for i in range(0, runs_goal+1)]
runs_count[0] = 1
M = 0
for i, Nc in enumerate(freq):
if Nc > 0:
runs_count_prev = list(runs_count)
runs_count = [0 for i in range(0, runs_goal+1)]
r0 = 0
while r0 <= runs_goal:
r = r0 + 1
while r <= runs_goal:
nw = ct(M, Nc, r0, r)
runs_count[r] += nw * runs_count_prev[r0]
r += 1
r0 += 1
M += Nc
return runs_count[runs_goal]
# print solve(freqs[2], total_runs[3]) % 1000003
# print timedelta
def td(t0, t1):
ms = (t0 - t1).microseconds
s = (t0 - t1).seconds
r = ".".join([str(s), str(ms)])
return r
t0 = datetime.now()
for i in range(0, tests):
s0 = solve(freqs[i], total_runs[i+1])
s0 = s0 % 1000003
fout.write("Case #%d: %d\n" % (i+1, s0))
t0s = datetime.now()
print "split time for case #%d: %s" % (i+1, td(t0s, t0))
t1 = datetime.now()
print "total time: %s" % (td(t1,t0))
"""
results:
small dataset runtime: 406.79s
large dataset runtime: 442.54s
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment