Last active
August 29, 2015 14:10
-
-
Save libbkmz/bad287217b8fe352fe96 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from copy import copy, deepcopy | |
from trie import Trie | |
from itertools import chain | |
STATE = [ | |
u"", u"", u"т", u"", u"ч", | |
u"", u"м", u"а", u"е", u"у", | |
u"м", u"о", u"л", u"о", u"т", | |
u"ш", u"", u"к", u"ь", u"х", | |
u"", u"е", u"у", u"", u"", | |
] | |
STATE = [ | |
u"б", u"", u"", u"", u"", | |
u"а", u"г", u"я", u"п", u"", | |
u"т", u"е", u"р", u"а", u"и", | |
u"а", u"м", u"о", u"к", u"", | |
u"к", u"", u"й", u"", u"", | |
] | |
STATE = [ | |
u"а", u"л", u"е", u"м", u"о", | |
u"к", u"о", u"к", u"ф", u"п", | |
u"э", u"ш", u"а", u"р", u"п", | |
u"с", u"т", u"а", u"м", u"п", | |
u"", u"а", u"н", u"ы", u"в", | |
] | |
STATE2 = [ | |
u"", u"", u"", u"", u"и", | |
u"", u"", u"", u"и", u"ц", | |
u"п", u"р", u"а", u"т", u"а", | |
u"", u"и", u"в", u"и", u"з", | |
u"", u"", u"", u"", u"", | |
] | |
STATE2 = [ | |
u"", u"д", u"а", u"", u"", | |
u"а", u"", u"", u"", u"", | |
u"б", u"", u"", u"", u"", | |
u"", u"", u"", u"", u"", | |
u"", u"", u"", u"", u"", | |
] | |
STATE_SIZE = 5 | |
STATE_LEN = len(STATE) | |
# alphabet = u"абвгдеёжзийклмнопрстуфхцчшщъыьэюя" | |
alphabet = u"л" | |
utr = Trie() | |
tr = Trie() | |
tr_inv = Trie() | |
hl = open("dict", "r") | |
# tr.append(u"ломоть") | |
print "Building trees..." | |
for line in hl.readlines(): | |
line = line.rstrip("\n\r").decode("utf-8") | |
tr.append(line) | |
for i in reversed(xrange(len(line))): | |
# print line[i::-1] | |
tr_inv.append(line[i::-1]) | |
print (u"лаб" in tr_inv) | |
print (tr_inv.exist(u"ла")) | |
print "Tries builed, sir!" | |
def traverse(state, index, path, res, ipath): | |
global utr | |
if state[index] == u"": | |
# raise ValueError | |
return | |
state = copy(state) | |
ipath = copy(ipath) | |
out = state[index] | |
ipath.append(index) | |
path = copy(path) | |
path.append(out) | |
str_path = "".join(path) | |
# print state[index] | |
# print index | |
# GC | |
# del state[index] | |
state[index] = u"" | |
# import ipdb; ipdb.set_trace() | |
# import ipdb; ipdb.set_trace() | |
# print "%s[%s]: %s" % (out, index, "".join(path)) | |
if len(path) > 1 and (not str_path in utr) and str_path in tr: | |
res.append([str_path, ipath]) | |
utr.append(str_path) | |
if index >= 1 and state[index-1] and (not index % STATE_SIZE == 0): | |
traverse(state, index-1, path, res, ipath) | |
if index < STATE_LEN-1 and state[index+1] and (not index+1 % STATE_SIZE == 0): | |
traverse(state, index+1, path, res, ipath) | |
if index >= STATE_SIZE and state[index-STATE_SIZE]: | |
traverse(state, index-STATE_SIZE, path, res, ipath) | |
if index < STATE_LEN-STATE_SIZE and state[index+STATE_SIZE]: | |
traverse(state, index+STATE_SIZE, path, res, ipath) | |
# if index == 18: | |
# import ipdb; ipdb.set_trace() | |
# print "".join(path) | |
# return "".join(path) | |
return res | |
def test_direction(state, index): | |
if index >= 1 and state[index-1] and (not index % STATE_SIZE == 0): | |
return True | |
if index < STATE_LEN-1 and state[index+1] and (not index % STATE_SIZE == 0): | |
return True | |
if index >= STATE_SIZE and state[index-STATE_SIZE]: | |
return True | |
if index < STATE_LEN-STATE_SIZE and state[index+STATE_SIZE]: | |
return True | |
def reverse_callback(state, path, ipath): | |
state = copy(state) | |
path = copy(path) | |
ipath = copy(ipath) | |
return traverse(state, ipath[-1], path[:-1], [], ipath[:-1]) | |
def reverse_search(state, path, ipath): | |
return reverse_callback(state, list(reversed(path)), list(reversed(ipath))) | |
def traverse_o(state, index, path, res, ipath): | |
state = copy(state) | |
ipath = copy(ipath) | |
path = copy(path) | |
out = state[index] | |
path.append(out) | |
ipath.append(index) | |
str_path = "".join(path) | |
if index >= 1 and state[index-1] and (not index % STATE_SIZE == 0): | |
if tr_inv.exist(str_path+state[index-1]): | |
traverse_o(state, index-1, path, res, ipath) | |
if index < STATE_LEN-1 and state[index+1] and (not index+1 % STATE_SIZE == 0): | |
if tr_inv.exist(str_path+state[index+1]): | |
traverse_o(state, index+1, path, res, ipath) | |
if index >= STATE_SIZE and state[index-STATE_SIZE]: | |
if tr_inv.exist(str_path+state[index-STATE_SIZE]): | |
traverse_o(state, index-STATE_SIZE, path, res, ipath) | |
if index < STATE_LEN-STATE_SIZE and state[index+STATE_SIZE]: | |
if tr_inv.exist(str_path+state[index+STATE_SIZE]): | |
traverse_o(state, index+STATE_SIZE, path, res, ipath) | |
if str_path in tr_inv: | |
res.append(reverse_search(state, path, ipath)) | |
state[index] = u"" | |
return res | |
# for i, el in enumerate(STATE): | |
# if el: | |
# print STATE[i] | |
# print STATE[i-1] | |
# print STATE[i+1] | |
# print STATE[i+STATE_SIZE] | |
# print STATE[i-STATE_SIZE] | |
# break | |
# import ipdb; ipdb.set_trace() | |
# print traverse(copy(STATE), 10, [], []) | |
# import ipdb; ipdb.set_trace() | |
# print traverse(copy(STATE), 10, [], []) | |
qqq = [] | |
ccc = 0 | |
for i, el1 in enumerate(STATE): | |
if test_direction(STATE, i): | |
for char in alphabet: | |
if not STATE[i]: | |
STATE[i] = char | |
for j, el2 in enumerate(STATE): | |
qqq.append(traverse_o(STATE, j, [], [], [])) | |
# import ipdb; ipdb.set_trace() | |
print j | |
print i | |
STATE[i] = u"" | |
qqq = list(chain(*qqq)) | |
qqq = filter(lambda x: not not x, qqq) | |
qqq = list(chain(*qqq)) | |
qqq.sort(lambda x,y: cmp(len(x[0]), len(y[0]))) | |
for i in qqq: | |
if i: | |
# print i | |
print "%s: %s" % (i[0], i[1]) | |
# print len(qqq) | |
# print ccc | |
# raw_input() | |
# hl2 = open("res", "w") | |
# for i in qqq: | |
# if not i is None: | |
# for j in i: | |
# if j in tr: | |
# hl2.write(j.encode("utf-8")+"\n") | |
# hl2.close() | |
# for j, el1 in enumerate(STATE): | |
# if test_direction(STATE, j): | |
# for char in alphabet: | |
# if not STATE[j]: | |
# STATE[j] = char | |
# for k, el2 in enumerate(STATE): | |
# if STATE[k]: | |
# res = traverse(copy(STATE), k, []) | |
# print res | |
# if res in tr: | |
# print char+": " , | |
# print res | |
# print STATE | |
# # print STATE | |
# STATE[j] = u"" | |
# for j, el1 in enumerate(STATE): | |
# if el1: | |
# res = traverse(copy(STATE), j) | |
# print res | |
# if res in tr: | |
# print res |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
class Trie(): | |
_end = "_end_" | |
def __init__(self, *words): | |
self.root = dict() | |
self.append(*words) | |
def append(self, *words): | |
for word in words: | |
current_dict = self.root | |
for letter in word: | |
current_dict = current_dict.setdefault(letter, {}) | |
current_dict = current_dict.setdefault(self._end, self._end) | |
return self | |
def __contains__(self, item): | |
current_dict = self.root | |
item = unicode(item) | |
for letter in item: | |
if letter in current_dict: | |
current_dict = current_dict[letter] | |
else: | |
return False | |
else: | |
if self._end in current_dict: | |
return True | |
else: | |
return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment