Created
May 9, 2014 11:04
-
-
Save a1k0n/32c885b832a554331421 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import math | |
from collections import defaultdict | |
# ideally, we need to minimize the state changes | |
# so we need to track the transitions | |
s = sys.stdin.read() | |
rle = [] | |
freq = defaultdict(int) | |
maxrun = defaultdict(int) | |
transition = defaultdict(int) | |
lastc = None | |
run = 1 | |
for c in s: | |
freq[c] += 1 | |
if c == lastc: | |
run += 1 | |
maxrun[c] = max(maxrun[c], run) | |
else: | |
if lastc is not None: | |
a = chr(ord(min(lastc, c))) | |
b = chr(ord(max(lastc, c))) | |
if run > 10: | |
transition[(-1, a)] += 1 | |
transition[(-1, b)] += 1 | |
else: | |
transition[(a, b)] += 1 | |
rle.append((lastc, run)) | |
run = 1 | |
lastc = c | |
rle.append((lastc, run)) | |
print rle | |
chars = freq.keys() | |
N = len(chars) | |
freq = sorted([(f, c) for c, f in freq.iteritems()], reverse=True) | |
maxrun = sorted([(f, c) for c, f in maxrun.iteritems()], reverse=True) | |
print "chars", chars | |
print "freq", freq | |
print "maxrun", maxrun | |
edgelist = sorted([(f, c) for c, f in transition.iteritems()], reverse=True) | |
print "edgelist", edgelist | |
best = 1e30 | |
def tfreq(c1, c2): | |
a = chr(ord(min(c1, c2))) | |
b = chr(ord(max(c1, c2))) | |
return transition[(a, b)] | |
def optimalseq(soln, n, score): | |
global best | |
bestsoln = None | |
if n == N: | |
if score < best: | |
best = score | |
print score, soln | |
return soln | |
return None | |
for c in chars: | |
if c in soln: | |
continue | |
soln.append(c) | |
prevscore = score | |
for i in range(n): | |
score += abs(i-n) * tfreq(soln[i], c) | |
score += 2 * (n+1) * transition[(-1, c)] | |
if score < best: | |
s = optimalseq(soln, n+1, score) | |
if s is not None: | |
bestsoln = s[:] | |
soln.pop() | |
score = prevscore | |
return bestsoln | |
# first, output optimal sequence | |
soln = optimalseq([], 0, 0) | |
print soln | |
def factor(n): | |
f1 = int(math.sqrt(n)) | |
while f1 > 0: | |
f2 = n/f1 | |
if f1*f2 == n: | |
return max(f1, f2), min(f1, f2) | |
f1 += 1 | |
return n, 1 | |
def factor2(n): | |
bestl = None | |
f1 = int(math.sqrt(n)) | |
bestf1, bestf2, bestf3 = None, None, None | |
while f1 <= n: | |
f2 = n/f1 | |
f3 = n - f1*f2 | |
l = f1+f2+f3 | |
if bestl is None or l < bestl: | |
bestl = l | |
bestf1, bestf2, bestf3 = f1, f2, f3 | |
f1 += 1 | |
return bestf1, bestf2, bestf3 | |
def incrbatch(target, bfmem, n): | |
cmds = [] | |
limit = len(bfmem) | |
while limit > 0 and bfmem[limit-1] == target[limit-1]: | |
limit -= 1 | |
if limit == 0: | |
return | |
for i in range(limit): | |
cmds.append('>') | |
if bfmem[i] < target[i]: | |
cmds.append('+'*n) | |
cmds.append('<' * limit) | |
return ''.join(cmds) | |
def incrloop(target, bfmem, f1, f2): | |
cmds = [] | |
cmds.append('+'*f1) | |
cmds.append('[') | |
cmds.append(incrbatch(target, bfmem, f2)) | |
cmds.append('-]') | |
return ''.join(cmds) | |
def initbfmem(): | |
# now, we need to initialize these memory cells (hm, but if the frequency | |
# is low enough for some characters, is it worth it?) | |
bfmem = [0] * N | |
targetmem = map(ord, soln) | |
acc = 0 | |
while True: | |
left = [x for x in targetmem if x > acc] | |
if len(left) == 0: | |
break | |
m = min(left) - acc | |
if m == 0: | |
break | |
f1, f2 = factor(m) | |
# print m, f1, f2, bfmem | |
if f2 == 1: | |
print incrbatch(targetmem, bfmem, f1) | |
else: | |
print incrloop(targetmem, bfmem, f1, f2) | |
for i in range(N): | |
if bfmem[i] < targetmem[i]: | |
bfmem[i] += m | |
acc += m | |
print bfmem | |
# that works, but kind of sucks. to optimize, we need to generate the shortest | |
# sequence of: | |
# [k1, [k2], [k3]] where each memory location i is incremented by | |
# k1*k2[i] + k3[i] (where k2[i] and k3[i] can be zero) | |
# once k1 is determined, k2 and k3 are unique, so we need to find the optimal sequence of [k1,...] | |
# the length of the encoding is sum(k1[i]), sum(k2[i][j]) + sum(k3[i][j]) + m*len(k1) | |
# we could also have k2 or k3 be negative | |
# actually we shoudl definitely consider k3<0 | |
# and actually k3 only applies at the end of the sequence for each one anyway | |
# as there is nothing to be gained from putting it earlier | |
# so, hmm | |
# we want the largest k1 at each stage | |
def factorall(arr): | |
mostppl = 0 | |
bestk1 = None | |
bestk2 = None | |
n = min(arr) | |
while n >= 1: | |
k2 = [x/n for x in arr] | |
ppl = float(sum((x*n for x in k2)))/(n+sum(k2)) | |
# progress per length | |
# print n, ppl, k2 | |
if ppl > mostppl: | |
mostppl, bestk1, bestk2 = ppl, n, k2 | |
n -= 1 | |
a = [bestk2[i]*bestk1 for i in range(len(arr))] | |
return a, bestk1, bestk2 | |
arr, bestk1, bestk2 = factorall(map(ord, soln)) | |
print bestk1, bestk2, arr | |
init = ['+'*bestk1, '['] | |
for k in bestk2: | |
init.append('>') | |
init.append('+'*k) | |
init.append('<'*len(bestk2)) | |
init.append('-]') | |
print ''.join(init) | |
cursor = -1 | |
output = [] | |
for c, runlen in rle: | |
idx = soln.index(c) | |
cdiff = idx - cursor | |
# to do RLE: move cursor to -1, '+'*f1, '[', '>'*index, '.'*f2, '<'*index, '-]' | |
if runlen > 2: | |
f1, f2, f3 = factor2(runlen) | |
rlelen = cursor+1 + f1 + 3*idx + f2 + 3 | |
if f3 != 0: | |
rlelen += f3 | |
if rlelen < abs(cdiff)+runlen and ord(c) == arr[idx]: # can only do this after the memory locations are set | |
output.append('<'*(cursor+1)) | |
output.append('+'*f1) | |
output.append('[') | |
output.append('>'*(idx+1)) | |
output.append('.'*f2) | |
output.append('<'*(idx+1)) | |
output.append('-]') | |
if f3 != 0: | |
output.append('>'*(idx+1)) | |
output.append('.'*f3) | |
cursor = idx | |
else: | |
cursor = -1 | |
continue | |
if cdiff < 0: | |
output.append('<' * (-cdiff)) | |
elif cdiff > 0: | |
output.append('>' * cdiff) | |
cursor += cdiff | |
residue = ord(c) - arr[cursor] | |
if residue > 0: | |
output.append('+' * residue) | |
elif residue < 0: | |
output.append('-' * (-residue)) | |
arr[cursor] += residue | |
output.append('.'*runlen) | |
print ''.join(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment