Skip to content

Instantly share code, notes, and snippets.

@kmike
Created October 2, 2012 18:27
Show Gist options
  • Save kmike/3822071 to your computer and use it in GitHub Desktop.
Save kmike/3822071 to your computer and use it in GitHub Desktop.
вывод в консоль всех слов из словаря pymorphy 0.5.6 (от Yuri Baburov)
import pprint
import pymorphy
import sys
sysenc = 'utf-8'
def enc(*y):
for x in y:
if isinstance(x, unicode):
yield x.encode(sysenc)
else:
yield ("%s" % x).encode(sysenc)
def re(x):
return pprint.pformat(x).decode('unicode_escape')
def reprint(*y):
for x in enc(*map(re, y)):
print x,
print
r = pymorphy.get_morph('/Projects/texts/dicts/pymorphy/ru', backend='shelve')
gr = lambda x: reprint(r.get_graminfo(x.upper()))
def iter_words(r):
listed = set()
for lemma in r.data.lemmas.dict.keys():
lemma = lemma.decode('utf-8')
forms = r.data.lemmas[lemma]
for form in forms:
rule = r.data.rules[form]
word = lemma + rule[0][0]
if not word in listed:
yield word
listed.add(word)
if __name__ == '__main__':
o = set()
if sys.argv[1] == 'file':
f = open(sys.argv[2], 'rt')
words = f.read().decode('utf-8').split()
for w in words:
gi = r.get_graminfo(w.upper())
print 'Total:', len(words), 'words'
elif sys.argv[1] == 'all':
#print "#class info norm word".replace(' ','\t')
for word in iter_words(r):
#reprint(norm)
for d in r.decline(word):
p = (d['class'], d['info'], word, d['word'])
key = '\t'.join(p)
if not key in o:
print key.encode('utf-8')
o.add(key)
#for w in r.data.:
# print w
else:
gr(sys.argv[1].decode(sysenc))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment