phoeagon · November 17, 2015 03:09
diff --git a/input b/input
 (5,6,5,8,5,7)
 oenioa
 nahpnc
 ecgcen
 nradrr
 rtrosh
 ohdbur

 (4,5)
 rah
 psa
 clw



 lac
 kre
 cof

 thg
 mif
 ous

 eyr
 icr
 rcu

 gni
 ber
 xor

 cai
 htr
 aes

 bnp
 tor
 koi

 nai
 thg
 uge

 (3,6)
 pro
 aef
 sst

 (5,4)
 hry
 arr
 ifu

 mth
 eyn
 mit

 upc
 rsa
 seh

 car
 osf
 tob

 (6,3)
 gna
 eri
 ogf

 (5,4)
 ael
 fad
 sla
 (3,6)
 IID
 ADK
 PER

 (4,5)
 pca
 ato
 rka

 (3,6)
 oub
 eit
 ntt

 (5,4)
 wii
 nfp
 eoh

 (5,4)
 ene
 csa
 ply

 (3,6)
 rbu
 cpt
 uet

 (5,4)
 erb
 ige
 tar

 #########
 (3, 6)
 ini
 uki
 nsb
 ###########

 #[[' ', ' ', 'b'], [' ', 'k', 'i'], ['i', 'n', 'i']]

 nup
 gou
 ebl

 vif
 eri
 rsh

 ner
 imv
 cae

 (3,6)
 efc
 eeo
 atf

 (4,5)
 wee
 svt
 loe

 eoh
 ynl
 god

 elp
 ifa
 drk

 cim
 enm
 lab

 ppe
 paa
 ler

 sny
 unb
 nur
 (4,5)
 erp
 tel
 tna

 (5,4)
 eah
 bct
 kie

 (4,5)
 llo
 arh
 tst

 (4,5)
 let
 raa
 kew
diff --git a/solve.py b/solve.py

 import itertools
 import pprint
 import nltk
 import dill
 import copy
 from pathos.multiprocessing import ProcessingPool as Pool

 DICT_FILE=['/Users/zheq/Downloads/words', '/run/shm/words','/etc/dictionaries-common/words']

 DIR=list(itertools.product([1,-1,0], repeat=2))
 DIR.remove((0,0))

 def get_dict_file():
    for path in DICT_FILE:
        try:
            return open(path, 'r')
        except:
            pass
    return None

 def get_freq_dict():
    text = nltk.corpus.gutenberg.words()
    fdisk = nltk.FreqDist(w.lower() for w in text)
    print "Loaded frequency dict"
    return fdisk

 def get_word_lens():
    lens_tuple = input('Please input the lengths of each word, as tuple ')
    return sorted(lens_tuple)

 def get_dict(f):
    return {word.strip() for word in f.readlines()}

 def get_prefix_dict(dictionary):
    prefix_dict = set()
    for word in dictionary:
        for i in range(1, len(word)):
            prefix_dict.add(word[:i])
    prefix_dict.add('')
    return prefix_dict

 def get_board(total_len):
    cur_len = 0
    board = []
    while cur_len < total_len:
        line = raw_input().lower()
        cur_len += len(line)
        board.append(list(line))
    board.reverse()
    return board

 def work_multi(board, word_lens, dictionary):
    n = len(board)
    m = len(board[0])
    global_ans = []
    prefix_dict = get_prefix_dict(dictionary)

    def work_sg(pos, word_lens):
        global_ans = []
        #for word_lens in set(itertools.permutations(word_lens)):
        dfs(board, n, m, pos[0], pos[1], '', ' ', [], word_lens,
            global_ans, dictionary, prefix_dict)
        return global_ans

    return reduce(list.__add__,
        Pool().map(work_sg, list(itertools.product(range(n), range(m))),
            [word_lens] * (n*m)))


 def work(board, word_lens, dictionary):
    n = len(board)
    m = len(board[0])
    global_ans = []
    prefix_dict = get_prefix_dict(dictionary)
    try:
        for i in range(n):
            for j in range(m):
                #for word_lens in set(itertools.permutations(word_lens)):
                    dfs(board, n, m, i, j, '', ' ', [], word_lens,
                        global_ans, dictionary, prefix_dict)
    except KeyboardInterrupt:
        print global_ans
    return global_ans

 def clear_selected(board, del_c, height):
    board = [list(line) for line in zip(*board)]
    for i in range(len(board)):
        board[i] = filter(lambda x: x != del_c, board[i])
    for i in range(len(board)):
        board[i] += [del_c] * (height - len(board[i]))
    return [list(line) for line in zip(*board)]

 def count_connect(board, n, m, x, y, del_c):
    if x >= 0 and x < n and y >= 0 and y < m:
        if (board[x][y] != del_c):
            tmp , board[x][y] = board[x][y] , del_c
            ans = 1 + sum([count_connect(board, n, m,
                x + diff[0], y + diff[1], del_c) for diff in DIR])
            return ans
    return 0

 def subsets(mySet):
    return reduce(lambda z, x: z + [y + [x] for y in z], mySet, [[]])

 def test_is_possible(connect_count, word_lens):
    if connect_count < word_lens[0]:
        return False
    possible_sum = {sum(x) for x in subsets(word_lens[1:])}
    possible_sum.add(0)
    # print possible_sum, word_lens, connect_count, connect_count - word_lens[0]
    if (connect_count - word_lens[0]) not in possible_sum :
        return False
    return True

 def dfs(board, n, m, x, y, cur, del_c, solutions, word_lens, global_ans,
    dictionary, prefix_dict):
    if cur not in prefix_dict:
        return
    if x >= 0 and x < n and y >= 0 and y < m:
        if board[x][y] != del_c:
            tmp_c = board[x][y]
            cur += tmp_c
            board[x][y] = del_c
            # If len fullfilled
            if len(cur) in word_lens:
                if cur in dictionary:
                    if len(word_lens) == 1:
                        # All keyboard searched
                        global_ans.append(solutions + [cur])
                    else:
                        # If not, still look for the next word
                        new_board = clear_selected(board, del_c, n)
                        for u in range(n):
                            for v in range(m):
                                if board[u][v] != del_c:
                                    tmp_board = copy.deepcopy(new_board)
                                    cnt = count_connect(tmp_board, n, m, u, v, del_c)
                                    word_lens.remove(len(cur))
                                    if test_is_possible(cnt, word_lens):
                                        dfs(new_board, n, m, u, v, '', del_c,
                                            solutions + [cur], word_lens,
                                            global_ans, dictionary, prefix_dict)
                                    word_lens.append(len(cur))
            # Either way, search for the next letter
            for diff in DIR:
                i = x + diff[0]
                j = y + diff[1]
                dfs(board, n, m, i, j, cur, del_c, solutions,
                    word_lens, global_ans, dictionary, prefix_dict)
            board[x][y] = tmp_c


 def main():
    dictionary = get_dict(get_dict_file())
    word_lens = get_word_lens()
    board = get_board(sum(word_lens))
    try:
        ans = work_multi(board, word_lens, dictionary)
    except:
        print "Fall back to single thread"
        ans = work(board, word_lens, dictionary)
    pprint.pprint(ans)
    print "\n" * 3
    fdisk = get_freq_dict()
    ans = sorted(ans, key=lambda x: -min(fdisk[xx] for xx in x))
    pprint.pprint(ans[:20])

 if __name__ == '__main__':
    main()
	(5,6,5,8,5,7)
	oenioa
	nahpnc
	ecgcen
	nradrr
	rtrosh
	ohdbur

	(4,5)
	rah
	psa
	clw



	lac
	kre
	cof

	thg
	mif
	ous

	eyr
	icr
	rcu

	gni
	ber
	xor

	cai
	htr
	aes

	bnp
	tor
	koi

	nai
	thg
	uge

	(3,6)
	pro
	aef
	sst

	(5,4)
	hry
	arr
	ifu

	mth
	eyn
	mit

	upc
	rsa
	seh

	car
	osf
	tob

	(6,3)
	gna
	eri
	ogf

	(5,4)
	ael
	fad
	sla
	(3,6)
	IID
	ADK
	PER

	(4,5)
	pca
	ato
	rka

	(3,6)
	oub
	eit
	ntt

	(5,4)
	wii
	nfp
	eoh

	(5,4)
	ene
	csa
	ply

	(3,6)
	rbu
	cpt
	uet

	(5,4)
	erb
	ige
	tar

	#########
	(3, 6)
	ini
	uki
	nsb
	###########

	#[[' ', ' ', 'b'], [' ', 'k', 'i'], ['i', 'n', 'i']]

	nup
	gou
	ebl

	vif
	eri
	rsh

	ner
	imv
	cae

	(3,6)
	efc
	eeo
	atf

	(4,5)
	wee
	svt
	loe

	eoh
	ynl
	god

	elp
	ifa
	drk

	cim
	enm
	lab

	ppe
	paa
	ler

	sny
	unb
	nur
	(4,5)
	erp
	tel
	tna

	(5,4)
	eah
	bct
	kie

	(4,5)
	llo
	arh
	tst

	(4,5)
	let
	raa
	kew

	import itertools
	import pprint
	import nltk
	import dill
	import copy
	from pathos.multiprocessing import ProcessingPool as Pool

	DICT_FILE=['/Users/zheq/Downloads/words', '/run/shm/words','/etc/dictionaries-common/words']

	DIR=list(itertools.product([1,-1,0], repeat=2))
	DIR.remove((0,0))

	def get_dict_file():
	for path in DICT_FILE:
	try:
	return open(path, 'r')
	except:
	pass
	return None

	def get_freq_dict():
	text = nltk.corpus.gutenberg.words()
	fdisk = nltk.FreqDist(w.lower() for w in text)
	print "Loaded frequency dict"
	return fdisk

	def get_word_lens():
	lens_tuple = input('Please input the lengths of each word, as tuple ')
	return sorted(lens_tuple)

	def get_dict(f):
	return {word.strip() for word in f.readlines()}

	def get_prefix_dict(dictionary):
	prefix_dict = set()
	for word in dictionary:
	for i in range(1, len(word)):
	prefix_dict.add(word[:i])
	prefix_dict.add('')
	return prefix_dict

	def get_board(total_len):
	cur_len = 0
	board = []
	while cur_len < total_len:
	line = raw_input().lower()
	cur_len += len(line)
	board.append(list(line))
	board.reverse()
	return board

	def work_multi(board, word_lens, dictionary):
	n = len(board)
	m = len(board[0])
	global_ans = []
	prefix_dict = get_prefix_dict(dictionary)

	def work_sg(pos, word_lens):
	global_ans = []
	#for word_lens in set(itertools.permutations(word_lens)):
	dfs(board, n, m, pos[0], pos[1], '', ' ', [], word_lens,
	global_ans, dictionary, prefix_dict)
	return global_ans

	return reduce(list.__add__,
	Pool().map(work_sg, list(itertools.product(range(n), range(m))),
	[word_lens] * (n*m)))


	def work(board, word_lens, dictionary):
	n = len(board)
	m = len(board[0])
	global_ans = []
	prefix_dict = get_prefix_dict(dictionary)
	try:
	for i in range(n):
	for j in range(m):
	#for word_lens in set(itertools.permutations(word_lens)):
	dfs(board, n, m, i, j, '', ' ', [], word_lens,
	global_ans, dictionary, prefix_dict)
	except KeyboardInterrupt:
	print global_ans
	return global_ans

	def clear_selected(board, del_c, height):
	board = [list(line) for line in zip(*board)]
	for i in range(len(board)):
	board[i] = filter(lambda x: x != del_c, board[i])
	for i in range(len(board)):
	board[i] += [del_c] * (height - len(board[i]))
	return [list(line) for line in zip(*board)]

	def count_connect(board, n, m, x, y, del_c):
	if x >= 0 and x < n and y >= 0 and y < m:
	if (board[x][y] != del_c):
	tmp , board[x][y] = board[x][y] , del_c
	ans = 1 + sum([count_connect(board, n, m,
	x + diff[0], y + diff[1], del_c) for diff in DIR])
	return ans
	return 0

	def subsets(mySet):
	return reduce(lambda z, x: z + [y + [x] for y in z], mySet, [[]])

	def test_is_possible(connect_count, word_lens):
	if connect_count < word_lens[0]:
	return False
	possible_sum = {sum(x) for x in subsets(word_lens[1:])}
	possible_sum.add(0)
	# print possible_sum, word_lens, connect_count, connect_count - word_lens[0]
	if (connect_count - word_lens[0]) not in possible_sum :
	return False
	return True

	def dfs(board, n, m, x, y, cur, del_c, solutions, word_lens, global_ans,
	dictionary, prefix_dict):
	if cur not in prefix_dict:
	return
	if x >= 0 and x < n and y >= 0 and y < m:
	if board[x][y] != del_c:
	tmp_c = board[x][y]
	cur += tmp_c
	board[x][y] = del_c
	# If len fullfilled
	if len(cur) in word_lens:
	if cur in dictionary:
	if len(word_lens) == 1:
	# All keyboard searched
	global_ans.append(solutions + [cur])
	else:
	# If not, still look for the next word
	new_board = clear_selected(board, del_c, n)
	for u in range(n):
	for v in range(m):
	if board[u][v] != del_c:
	tmp_board = copy.deepcopy(new_board)
	cnt = count_connect(tmp_board, n, m, u, v, del_c)
	word_lens.remove(len(cur))
	if test_is_possible(cnt, word_lens):
	dfs(new_board, n, m, u, v, '', del_c,
	solutions + [cur], word_lens,
	global_ans, dictionary, prefix_dict)
	word_lens.append(len(cur))
	# Either way, search for the next letter
	for diff in DIR:
	i = x + diff[0]
	j = y + diff[1]
	dfs(board, n, m, i, j, cur, del_c, solutions,
	word_lens, global_ans, dictionary, prefix_dict)
	board[x][y] = tmp_c


	def main():
	dictionary = get_dict(get_dict_file())
	word_lens = get_word_lens()
	board = get_board(sum(word_lens))
	try:
	ans = work_multi(board, word_lens, dictionary)
	except:
	print "Fall back to single thread"
	ans = work(board, word_lens, dictionary)
	pprint.pprint(ans)
	print "\n" * 3
	fdisk = get_freq_dict()
	ans = sorted(ans, key=lambda x: -min(fdisk[xx] for xx in x))
	pprint.pprint(ans[:20])

	if __name__ == '__main__':
	main()