Created
April 28, 2021 14:39
-
-
Save egorsmkv/43d0b8cc9018b0e47c07c3bbae3194ba to your computer and use it in GitHub Desktop.
mphdict words forms generator in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Generator of words forms for LinguisticAndInformationSystems/mphdict | |
Source code: https://github.com/LinguisticAndInformationSystems/mphdict/blob/master/src/mphdict/mphDb.cs#L214 | |
License: https://github.com/LinguisticAndInformationSystems/mphdict/blob/master/LICENSE.txt | |
Copyright: uSofTrod | |
Output is like the following: | |
1879380;бе"вкіт;0;2886;8;бе"вкіт;1 | |
1879380;бе"вкіт;0;2886;8;бе"вкоту;2 | |
1879380;бе"вкіт;0;2886;8;бе"вкотові;3 | |
1879380;бе"вкіт;0;2886;8;бе"вкоту;3 | |
1879380;бе"вкіт;0;2886;8;бе"вкіт;4 | |
1879380;бе"вкіт;0;2886;8;бе"вкотом;5 | |
1879380;бе"вкіт;0;2886;8;бе"вкоті;6 | |
1879380;бе"вкіт;0;2886;8;бе"вкоте*;7 | |
""" | |
import sqlite3 | |
DB_PATH = '/home/username/mph_ua.db' | |
if __name__ == '__main__': | |
con = sqlite3.connect(DB_PATH) | |
cur = con.cursor() | |
c = 0 | |
for row in cur.execute( | |
'SELECT reestr, type, nom_old, field2, part FROM nom WHERE isdel = FALSE ORDER BY digit, field2, reestr'): | |
nom_word = row[0] | |
nom_type = row[1] | |
nom_old = row[2] | |
nom_field2 = row[3] | |
nom_part = row[4] | |
cur2 = con.cursor() | |
cur2.execute(f'SELECT indent FROM indents WHERE type = {nom_type}') | |
_type = cur2.fetchone() | |
cur3 = con.cursor() | |
flexes = [] | |
for flex in cur3.execute(f'SELECT flex, field2 FROM flexes WHERE type = {nom_type} ORDER BY field2, digit'): | |
flexes.append(flex) | |
w = nom_word | |
# w = nom_word.replace('"', '') | |
wt = [] | |
if nom_type != 0: | |
w_base = w[0:len(w) - _type[0]] | |
for flex in flexes: | |
flex_str = '' | |
if flex[0]: | |
flex_str = flex[0] | |
wt.append(nom_old) | |
wt.append(';') | |
wt.append(w) | |
wt.append(';') | |
wt.append(nom_field2) | |
wt.append(';') | |
wt.append(nom_type) | |
wt.append(';') | |
wt.append(nom_part) | |
wt.append(';') | |
wt.append(w_base + flex_str) | |
wt.append(';') | |
wt.append(flex[1]) | |
wt.append('\n') | |
else: | |
wt.append(nom_old) | |
wt.append(';') | |
wt.append(w) | |
wt.append(';') | |
wt.append(nom_field2) | |
wt.append(';') | |
wt.append(nom_type) | |
wt.append(';') | |
wt.append(nom_part) | |
wt.append(';') | |
wt.append(w) | |
wt.append(';') | |
wt.append('0') | |
wt.append('\n') | |
wt_strings = [str(i) for i in wt] | |
print(''.join(wt_strings)) | |
print() | |
# print(_type) | |
# print(flexes) | |
# print(w) | |
c += 1 | |
print('Words=', c) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment