Last active
May 26, 2016 14:36
-
-
Save bilinin/a02b737f6e4caac1b156f5212eb74ff8 to your computer and use it in GitHub Desktop.
MORPH
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /home/bilinin/env34/bin/python3.4 | |
| import pymorphy2 | |
| morph = pymorphy2.MorphAnalyzer() | |
| f_words = ['который', 'которая', 'которое'] | |
| out_str = "ПУСТО" | |
| used_pos = [] | |
| class rule_(): | |
| adj = None | |
| noun_pos = None | |
| pronoun_pos = None | |
| def find(parsed): | |
| counter = 0 | |
| for word in parsed: | |
| for current in f_words: | |
| if (word.word == current): | |
| global used_pos | |
| used_pos += [counter] | |
| return counter | |
| counter += 1 | |
| def fing_verb(parsed): | |
| counter = 0 | |
| for word in parsed: | |
| if word.tag.POS == 'VERB': | |
| normal = word.normal_form | |
| normalParse = morph.parse(normal)[0] | |
| temp = normalParse.inflect({'nomn'}).word | |
| global used_pos | |
| used_pos += [counter] | |
| return morph.parse(temp)[0] | |
| counter += 1 | |
| def find_noun(parsed): | |
| counter = 0 | |
| for word in parsed: | |
| if word.tag.POS == 'NOUN': | |
| global used_pos | |
| used_pos += [counter] | |
| return counter | |
| counter += 1 | |
| def parse_all(input_str): | |
| parsed = [] | |
| words = input_str.split(" ") | |
| for word in words: | |
| current = morph.parse(word)[0] | |
| parsed += [current] | |
| return(parsed) | |
| def print_other(parsed): | |
| counter = 0 | |
| global used_pos | |
| for word in parsed: | |
| if counter in used_pos: | |
| a = 0 | |
| else: | |
| print(word.word) | |
| counter += 1 | |
| print("other") | |
| def analyse(input_str): | |
| parsed = parse_all(input_str) | |
| rule_1 = rule_() | |
| rule_1.adj = fing_verb(parsed) | |
| rule_1.noun_pos = find_noun(parsed) | |
| rule_1.pronoun_pos = find(parsed) | |
| inspect_compatibility(parsed,rule_1) | |
| print('INPUT:',input_str) | |
| print("RULE1:", rule_1.adj.word, parsed[rule_1.noun_pos].word) | |
| print_other(parsed) | |
| return "RULE1: "+rule_1.adj.word+" "+parsed[rule_1.noun_pos].word | |
| def inspect_compatibility(parsed,rule_1): | |
| if (parsed[rule_1.pronoun_pos].tag.gender == parsed[rule_1.noun_pos].tag.gender): | |
| print("inspect_compatibility: OK") | |
| else: | |
| print("inspect_compatibility: NE OK") | |
| input_str = "старый бомж который копает грядки" | |
| out_str = analyse(input_str) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment