Last active
July 24, 2019 15:19
-
-
Save Septillioner/60f119821c3b4ec0925dfbd2a704712d to your computer and use it in GitHub Desktop.
Kuş dili fena çözülüyor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
#-*- coding:utf-8 -*- | |
import string | |
import re | |
#Code begin | |
#Spellword Author : https://github.com/brolin59/PYTHON-TURKCE-DOGAL-DIL-ISLEME-TURKISH-NLP | |
clean_quiet = re.compile('[^aâeêıîioôöuûü]') | |
lower_vowel = 'aâeêıîioôöuûü' | |
lower_quiet = 'bcçdfgğhjklmnprsştvyzqwx' | |
def to_lower(word): | |
tolower_text = (word.replace('İ', 'i')) | |
tolower_text = (tolower_text.replace('I', 'ı')) | |
tolower_text = tolower_text.lower() | |
return tolower_text | |
def wordtoten(word): | |
word = to_lower(word) | |
translate_wtonum_0 = string.maketrans(lower_quiet, len(lower_quiet) * '0') | |
translate_wtonum_1 = string.maketrans(lower_vowel, len(lower_vowel) * '1') | |
word = (word.translate(translate_wtonum_1)).translate(translate_wtonum_0) | |
return word | |
def spellword(word): | |
syllable_list = [] | |
# Bulduğumuz heceleri bu listede toplayacağız. | |
syllable = "" | |
# Harfleri bir hece oluşturana kadar "syllable" değişkenine yazacağız. | |
gword = to_lower(word) | |
# "gword" değişkenine kelimemizin küçük harfe çevrilmiş halini atıyoruz. | |
tword = wordtoten(word) | |
# "tword" değişkenine kelimemizin sayılara çevrilmiş halini atıyoruz. | |
if tword.startswith('000') or tword.endswith('000'): | |
return False | |
tword = tword + '.....' | |
len_vowel = tword.count('1') | |
counter = 0 | |
for i, char in enumerate(tword): | |
if counter > 0: | |
counter -= 1 | |
continue | |
if char == '.': | |
if syllable and syllable.count('1') == 1: | |
syllable_list.append(gword[:len(syllable)]) | |
break | |
elif char == '0': | |
syllable = syllable + char | |
if syllable and (syllable == '000'): | |
break | |
continue | |
elif char == '1': | |
syllable = syllable + char | |
x = len(syllable) | |
if (tword[x:x + 2] == '01') or (tword[x:x + 2] == '10') or (tword[x:x + 2] == '1.'): | |
syllable_list.append(gword[:x]) | |
gword = gword[x:] | |
tword = tword[x:] | |
syllable = '' | |
continue | |
elif tword[x:x + 3] == '001': | |
syllable_list.append(gword[:x + 1]) | |
gword = gword[x + 1:] | |
tword = tword[x + 1:] | |
syllable = '' | |
counter += 1 | |
continue | |
elif tword[x:x + 3] == '00.': | |
syllable_list.append(gword[:x + 2]) | |
del gword | |
break | |
elif tword[x:x + 4] == '0001': | |
syllable_list.append(gword[:x + 2]) | |
gword = gword[x + 2:] | |
tword = tword[x + 2:] | |
syllable = '' | |
counter += 2 | |
continue | |
elif tword[x:x + 5] == '00001': | |
syllable_list.append(gword[:x + 2]) | |
gword = gword[x + 2:] | |
tword = tword[x + 2:] | |
syllable = '' | |
counter += 2 | |
continue | |
if (''.join(syllable_list) == word) and (len_vowel == len(syllable_list)): | |
return syllable_list | |
else: | |
return False | |
#Code end | |
class HeceAyirici(object): | |
"""docstring for HeceAyirici""" | |
sesli = ['a', 'e', 'ı', 'i', 'o', 'ö', 'u', 'ü', 'A', 'E', 'I', 'İ', 'O', 'Ö', 'U', 'Ü'] | |
sessiz = ['b', 'c', 'ç', 'd', 'f', 'g', 'ğ', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's', | |
'ş', 't', 'v', 'y', 'z', 'B', 'C', 'Ç', 'D', 'F', 'G', 'Ğ', 'H', 'J', 'K', 'L', | |
'M', 'N', 'P', 'R', 'S', 'Ş', 'T', 'V', 'Y', 'Z' ] | |
def __init__(self): | |
super(HeceAyirici, self).__init__() | |
# Code Begin | |
# Author of index_call, index_send, hecele https://gist.github.com/semihozkoroglu | |
# edited:septillioner | |
def index_call(self,ses,i,kelime): | |
try : | |
t = ses.index(kelime[i]) | |
except ValueError: | |
return -1 | |
return t | |
def index_send(self,ses,kelime): | |
i=0 | |
while len(ses) >= i+1: | |
if self.index_call(ses,i,kelime) != -1 : | |
return i | |
i += 1 | |
def hecele_(self,kelime): | |
heceler = [] | |
kelime = kelime[::-1] | |
while True: | |
if kelime == '': | |
break | |
if len(kelime) == 1: | |
if self.index_call(self.sesli,index_no,kelime) == -1: | |
heceler.append(heceler.pop()+kelime) | |
break | |
else: | |
heceler.append(kelime) | |
break | |
index_no = self.index_send(self.sesli,kelime) | |
index_no += 1 | |
if self.index_call(self.sesli,index_no,kelime) == -1: | |
heceler.append(kelime[:index_no+1]) | |
kelime = kelime[index_no+1:] | |
index_no = 0 | |
else: | |
heceler.append(kelime[:index_no]) | |
kelime = kelime[index_no:] | |
index_no = 0 | |
return [i[::-1] for i in heceler[::-1]] | |
# Code End | |
def hecele(self,kelime): | |
return spellword(kelime) | |
class KusDili(object): | |
"""docstring for KusDiliCozucu""" | |
u_kalin = ["a","ı","o","u"] | |
u_ince = ["e","i","ö","ü"] | |
u_genis_duz = ["a","e"] | |
u_dar_duz = ["ı","i"] | |
u_genis_yuvarlak = ["o","ö"] | |
u_dar_yuvarlak = ["u","ü"] | |
def __init__(self): | |
super(KusDili, self).__init__() | |
self.heceleyici = HeceAyirici() | |
self.kelimelistesi = open("kelime-listesi.txt").readlines() | |
def unluUyumu(self,hece): | |
pass | |
def findUnlu(self,hece): | |
for i in hece: | |
if i in self.heceleyici.sesli: | |
return i | |
def deleteUnluler(self,cumle): | |
new_cumle = "" | |
for i in cumle: | |
if i in self.heceleyici.sesli: | |
continue | |
new_cumle+=i | |
return new_cumle | |
def encode(self,cumle,type=0,passn="g"): | |
kelimeler = cumle.strip().split(" ") | |
cumle = [] | |
for kelime in kelimeler: | |
kelime_ = [] | |
heceler = self.heceleyici.hecele(kelime) | |
for hece in heceler: | |
kelime_.append(hece) | |
if(type == 0): | |
kelime_.append(passn+self.findUnlu(hece)) | |
kelime_sifreli = "".join(kelime_) | |
print "-".join(heceler)+" => "+kelime_sifreli+" ", | |
def decode(self,str_): | |
kelimeler = str_.split(" ") | |
pass_ = "" | |
_kelime = [] | |
for kelime in kelimeler: | |
heceler = self.heceleyici.hecele(kelime) | |
_heceler = [] | |
if(heceler): | |
if len(heceler) > 1: | |
pass_ = self.deleteUnluler(heceler[1]) | |
for i in range(0,len(heceler),2): | |
_heceler.append(heceler[i]) | |
_kelime.append("".join(_heceler)) | |
else: | |
try: | |
heceler = self.heceleyici.hecele_(kelime) | |
except IndexError: | |
continue | |
if len(heceler) > 1: | |
pass_ = self.deleteUnluler(heceler[1]) | |
for i in range(0,len(heceler),2): | |
_heceler.append(heceler[i]) | |
_kelime.append("".join(_heceler)) | |
print("Cumle : %s"%(" ".join(_kelime))) | |
print("Sifre : %s+[uyumlu unlu]"%(pass_)) | |
def test(mesaj): | |
ha = KusDili() | |
sifreli_mesaj = mesaj | |
print("mesaj : %s"%(sifreli_mesaj)) | |
ha.decode(mesaj) | |
def main(): | |
test("sagaatga kaçga?") | |
test("herge zagamanga") | |
test("apgatalga") | |
test("vayga bega") | |
test("yogaugatugabega işgagalge edgecekge") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Aceleye getirdim kod dağınık yaparsınız siz bir şeyler