a1k0n · May 9, 2014 11:04
diff --git a/ascii2bf.py b/ascii2bf.py
 import sys
 import math
 from collections import defaultdict

 # ideally, we need to minimize the state changes
 # so we need to track the transitions

 s = sys.stdin.read()
 rle = []
 freq = defaultdict(int)
 maxrun = defaultdict(int)
 transition = defaultdict(int)
 lastc = None
 run = 1
 for c in s:
    freq[c] += 1
    if c == lastc:
        run += 1
        maxrun[c] = max(maxrun[c], run)
    else:
        if lastc is not None:
            a = chr(ord(min(lastc, c)))
            b = chr(ord(max(lastc, c)))
            if run > 10:
                transition[(-1, a)] += 1
                transition[(-1, b)] += 1
            else:
                transition[(a, b)] += 1
            rle.append((lastc, run))
        run = 1
    lastc = c
 rle.append((lastc, run))
 print rle

 chars = freq.keys()
 N = len(chars)
 freq = sorted([(f, c) for c, f in freq.iteritems()], reverse=True)
 maxrun = sorted([(f, c) for c, f in maxrun.iteritems()], reverse=True)
 print "chars", chars
 print "freq", freq
 print "maxrun", maxrun

 edgelist = sorted([(f, c) for c, f in transition.iteritems()], reverse=True)
 print "edgelist", edgelist


 best = 1e30
 def tfreq(c1, c2):
    a = chr(ord(min(c1, c2)))
    b = chr(ord(max(c1, c2)))
    return transition[(a, b)]


 def optimalseq(soln, n, score):
    global best
    bestsoln = None
    if n == N:
        if score < best:
            best = score
            print score, soln
            return soln
        return None
    for c in chars:
        if c in soln:
            continue
        soln.append(c)
        prevscore = score
        for i in range(n):
            score += abs(i-n) * tfreq(soln[i], c)
        score += 2 * (n+1) * transition[(-1, c)]
        if score < best:
            s = optimalseq(soln, n+1, score)
            if s is not None:
                bestsoln = s[:]
        soln.pop()
        score = prevscore
    return bestsoln


 # first, output optimal sequence
 soln = optimalseq([], 0, 0)
 print soln


 def factor(n):
    f1 = int(math.sqrt(n))
    while f1 > 0:
        f2 = n/f1
        if f1*f2 == n:
            return max(f1, f2), min(f1, f2)
        f1 += 1
    return n, 1


 def factor2(n):
    bestl = None
    f1 = int(math.sqrt(n))
    bestf1, bestf2, bestf3 = None, None, None
    while f1 <= n:
        f2 = n/f1
        f3 = n - f1*f2
        l = f1+f2+f3
        if bestl is None or l < bestl:
            bestl = l
            bestf1, bestf2, bestf3 = f1, f2, f3
        f1 += 1
    return bestf1, bestf2, bestf3


 def incrbatch(target, bfmem, n):
    cmds = []
    limit = len(bfmem)
    while limit > 0 and bfmem[limit-1] == target[limit-1]:
        limit -= 1
    if limit == 0:
        return
    for i in range(limit):
        cmds.append('>')
        if bfmem[i] < target[i]:
            cmds.append('+'*n)
    cmds.append('<' * limit)
    return ''.join(cmds)


 def incrloop(target, bfmem, f1, f2):
    cmds = []
    cmds.append('+'*f1)
    cmds.append('[')
    cmds.append(incrbatch(target, bfmem, f2))
    cmds.append('-]')
    return ''.join(cmds)


 def initbfmem():
    # now, we need to initialize these memory cells (hm, but if the frequency
    # is low enough for some characters, is it worth it?)
    bfmem = [0] * N
    targetmem = map(ord, soln)
    acc = 0
    while True:
        left = [x for x in targetmem if x > acc]
        if len(left) == 0:
            break
        m = min(left) - acc
        if m == 0:
            break
        f1, f2 = factor(m)
        # print m, f1, f2, bfmem
        if f2 == 1:
            print incrbatch(targetmem, bfmem, f1)
        else:
            print incrloop(targetmem, bfmem, f1, f2)
        for i in range(N):
            if bfmem[i] < targetmem[i]:
                bfmem[i] += m
        acc += m
    print bfmem


 # that works, but kind of sucks.  to optimize, we need to generate the shortest
 # sequence of:
 # [k1, [k2], [k3]] where each memory location i is incremented by
 # k1*k2[i] + k3[i]  (where k2[i] and k3[i] can be zero)
 # once k1 is determined, k2 and k3 are unique, so we need to find the optimal sequence of [k1,...]
 # the length of the encoding is sum(k1[i]), sum(k2[i][j]) + sum(k3[i][j]) + m*len(k1)
 # we could also have k2 or k3 be negative
 # actually we shoudl definitely consider k3<0
 # and actually k3 only applies at the end of the sequence for each one anyway
 # as there is nothing to be gained from putting it earlier

 # so, hmm
 # we want the largest k1 at each stage
 def factorall(arr):
    mostppl = 0
    bestk1 = None
    bestk2 = None
    n = min(arr)
    while n >= 1:
        k2 = [x/n for x in arr]
        ppl = float(sum((x*n for x in k2)))/(n+sum(k2))
        # progress per length
        # print n, ppl, k2
        if ppl > mostppl:
            mostppl, bestk1, bestk2 = ppl, n, k2
        n -= 1
    a = [bestk2[i]*bestk1 for i in range(len(arr))]
    return a, bestk1, bestk2


 arr, bestk1, bestk2 = factorall(map(ord, soln))
 print bestk1, bestk2, arr

 init = ['+'*bestk1, '[']
 for k in bestk2:
    init.append('>')
    init.append('+'*k)
 init.append('<'*len(bestk2))
 init.append('-]')
 print ''.join(init)
 cursor = -1
 output = []
 for c, runlen in rle:
    idx = soln.index(c)
    cdiff = idx - cursor
    # to do RLE: move cursor to -1, '+'*f1, '[', '>'*index, '.'*f2, '<'*index, '-]'
    if runlen > 2:
        f1, f2, f3 = factor2(runlen)
        rlelen = cursor+1 + f1 + 3*idx + f2 + 3
        if f3 != 0:
            rlelen += f3
        if rlelen < abs(cdiff)+runlen and ord(c) == arr[idx]:  # can only do this after the memory locations are set
            output.append('<'*(cursor+1))
            output.append('+'*f1)
            output.append('[')
            output.append('>'*(idx+1))
            output.append('.'*f2)
            output.append('<'*(idx+1))
            output.append('-]')
            if f3 != 0:
                output.append('>'*(idx+1))
                output.append('.'*f3)
                cursor = idx
            else:
                cursor = -1
            continue
    if cdiff < 0:
        output.append('<' * (-cdiff))
    elif cdiff > 0:
        output.append('>' * cdiff)
    cursor += cdiff
    residue = ord(c) - arr[cursor]
    if residue > 0:
        output.append('+' * residue)
    elif residue < 0:
        output.append('-' * (-residue))
    arr[cursor] += residue
    output.append('.'*runlen)
 print ''.join(output)
	import sys
	import math
	from collections import defaultdict

	# ideally, we need to minimize the state changes
	# so we need to track the transitions

	s = sys.stdin.read()
	rle = []
	freq = defaultdict(int)
	maxrun = defaultdict(int)
	transition = defaultdict(int)
	lastc = None
	run = 1
	for c in s:
	freq[c] += 1
	if c == lastc:
	run += 1
	maxrun[c] = max(maxrun[c], run)
	else:
	if lastc is not None:
	a = chr(ord(min(lastc, c)))
	b = chr(ord(max(lastc, c)))
	if run > 10:
	transition[(-1, a)] += 1
	transition[(-1, b)] += 1
	else:
	transition[(a, b)] += 1
	rle.append((lastc, run))
	run = 1
	lastc = c
	rle.append((lastc, run))
	print rle

	chars = freq.keys()
	N = len(chars)
	freq = sorted([(f, c) for c, f in freq.iteritems()], reverse=True)
	maxrun = sorted([(f, c) for c, f in maxrun.iteritems()], reverse=True)
	print "chars", chars
	print "freq", freq
	print "maxrun", maxrun

	edgelist = sorted([(f, c) for c, f in transition.iteritems()], reverse=True)
	print "edgelist", edgelist


	best = 1e30
	def tfreq(c1, c2):
	a = chr(ord(min(c1, c2)))
	b = chr(ord(max(c1, c2)))
	return transition[(a, b)]


	def optimalseq(soln, n, score):
	global best
	bestsoln = None
	if n == N:
	if score < best:
	best = score
	print score, soln
	return soln
	return None
	for c in chars:
	if c in soln:
	continue
	soln.append(c)
	prevscore = score
	for i in range(n):
	score += abs(i-n) * tfreq(soln[i], c)
	score += 2 * (n+1) * transition[(-1, c)]
	if score < best:
	s = optimalseq(soln, n+1, score)
	if s is not None:
	bestsoln = s[:]
	soln.pop()
	score = prevscore
	return bestsoln


	# first, output optimal sequence
	soln = optimalseq([], 0, 0)
	print soln


	def factor(n):
	f1 = int(math.sqrt(n))
	while f1 > 0:
	f2 = n/f1
	if f1*f2 == n:
	return max(f1, f2), min(f1, f2)
	f1 += 1
	return n, 1


	def factor2(n):
	bestl = None
	f1 = int(math.sqrt(n))
	bestf1, bestf2, bestf3 = None, None, None
	while f1 <= n:
	f2 = n/f1
	f3 = n - f1*f2
	l = f1+f2+f3
	if bestl is None or l < bestl:
	bestl = l
	bestf1, bestf2, bestf3 = f1, f2, f3
	f1 += 1
	return bestf1, bestf2, bestf3


	def incrbatch(target, bfmem, n):
	cmds = []
	limit = len(bfmem)
	while limit > 0 and bfmem[limit-1] == target[limit-1]:
	limit -= 1
	if limit == 0:
	return
	for i in range(limit):
	cmds.append('>')
	if bfmem[i] < target[i]:
	cmds.append('+'*n)
	cmds.append('<' * limit)
	return ''.join(cmds)


	def incrloop(target, bfmem, f1, f2):
	cmds = []
	cmds.append('+'*f1)
	cmds.append('[')
	cmds.append(incrbatch(target, bfmem, f2))
	cmds.append('-]')
	return ''.join(cmds)


	def initbfmem():
	# now, we need to initialize these memory cells (hm, but if the frequency
	# is low enough for some characters, is it worth it?)
	bfmem = [0] * N
	targetmem = map(ord, soln)
	acc = 0
	while True:
	left = [x for x in targetmem if x > acc]
	if len(left) == 0:
	break
	m = min(left) - acc
	if m == 0:
	break
	f1, f2 = factor(m)
	# print m, f1, f2, bfmem
	if f2 == 1:
	print incrbatch(targetmem, bfmem, f1)
	else:
	print incrloop(targetmem, bfmem, f1, f2)
	for i in range(N):
	if bfmem[i] < targetmem[i]:
	bfmem[i] += m
	acc += m
	print bfmem


	# that works, but kind of sucks. to optimize, we need to generate the shortest
	# sequence of:
	# [k1, [k2], [k3]] where each memory location i is incremented by
	# k1*k2[i] + k3[i] (where k2[i] and k3[i] can be zero)
	# once k1 is determined, k2 and k3 are unique, so we need to find the optimal sequence of [k1,...]
	# the length of the encoding is sum(k1[i]), sum(k2[i][j]) + sum(k3[i][j]) + m*len(k1)
	# we could also have k2 or k3 be negative
	# actually we shoudl definitely consider k3<0
	# and actually k3 only applies at the end of the sequence for each one anyway
	# as there is nothing to be gained from putting it earlier

	# so, hmm
	# we want the largest k1 at each stage
	def factorall(arr):
	mostppl = 0
	bestk1 = None
	bestk2 = None
	n = min(arr)
	while n >= 1:
	k2 = [x/n for x in arr]
	ppl = float(sum((x*n for x in k2)))/(n+sum(k2))
	# progress per length
	# print n, ppl, k2
	if ppl > mostppl:
	mostppl, bestk1, bestk2 = ppl, n, k2
	n -= 1
	a = [bestk2[i]*bestk1 for i in range(len(arr))]
	return a, bestk1, bestk2


	arr, bestk1, bestk2 = factorall(map(ord, soln))
	print bestk1, bestk2, arr

	init = ['+'*bestk1, '[']
	for k in bestk2:
	init.append('>')
	init.append('+'*k)
	init.append('<'*len(bestk2))
	init.append('-]')
	print ''.join(init)
	cursor = -1
	output = []
	for c, runlen in rle:
	idx = soln.index(c)
	cdiff = idx - cursor
	# to do RLE: move cursor to -1, '+'f1, '[', '>'index, '.'f2, '<'index, '-]'
	if runlen > 2:
	f1, f2, f3 = factor2(runlen)
	rlelen = cursor+1 + f1 + 3*idx + f2 + 3
	if f3 != 0:
	rlelen += f3
	if rlelen < abs(cdiff)+runlen and ord(c) == arr[idx]: # can only do this after the memory locations are set
	output.append('<'*(cursor+1))
	output.append('+'*f1)
	output.append('[')
	output.append('>'*(idx+1))
	output.append('.'*f2)
	output.append('<'*(idx+1))
	output.append('-]')
	if f3 != 0:
	output.append('>'*(idx+1))
	output.append('.'*f3)
	cursor = idx
	else:
	cursor = -1
	continue
	if cdiff < 0:
	output.append('<' * (-cdiff))
	elif cdiff > 0:
	output.append('>' * cdiff)
	cursor += cdiff
	residue = ord(c) - arr[cursor]
	if residue > 0:
	output.append('+' * residue)
	elif residue < 0:
	output.append('-' * (-residue))
	arr[cursor] += residue
	output.append('.'*runlen)
	print ''.join(output)