Skip to content

Instantly share code, notes, and snippets.

@Septillioner
Last active July 24, 2019 15:19
Show Gist options
  • Select an option

  • Save Septillioner/60f119821c3b4ec0925dfbd2a704712d to your computer and use it in GitHub Desktop.

Select an option

Save Septillioner/60f119821c3b4ec0925dfbd2a704712d to your computer and use it in GitHub Desktop.
Kuş dili fena çözülüyor
#!/usr/bin/python3
#-*- coding:utf-8 -*-
import string
import re
#Code begin
#Spellword Author : https://github.com/brolin59/PYTHON-TURKCE-DOGAL-DIL-ISLEME-TURKISH-NLP
clean_quiet = re.compile('[^aâeêıîioôöuûü]')
lower_vowel = 'aâeêıîioôöuûü'
lower_quiet = 'bcçdfgğhjklmnprsştvyzqwx'
def to_lower(word):
tolower_text = (word.replace('İ', 'i'))
tolower_text = (tolower_text.replace('I', 'ı'))
tolower_text = tolower_text.lower()
return tolower_text
def wordtoten(word):
word = to_lower(word)
translate_wtonum_0 = string.maketrans(lower_quiet, len(lower_quiet) * '0')
translate_wtonum_1 = string.maketrans(lower_vowel, len(lower_vowel) * '1')
word = (word.translate(translate_wtonum_1)).translate(translate_wtonum_0)
return word
def spellword(word):
syllable_list = []
# Bulduğumuz heceleri bu listede toplayacağız.
syllable = ""
# Harfleri bir hece oluşturana kadar "syllable" değişkenine yazacağız.
gword = to_lower(word)
# "gword" değişkenine kelimemizin küçük harfe çevrilmiş halini atıyoruz.
tword = wordtoten(word)
# "tword" değişkenine kelimemizin sayılara çevrilmiş halini atıyoruz.
if tword.startswith('000') or tword.endswith('000'):
return False
tword = tword + '.....'
len_vowel = tword.count('1')
counter = 0
for i, char in enumerate(tword):
if counter > 0:
counter -= 1
continue
if char == '.':
if syllable and syllable.count('1') == 1:
syllable_list.append(gword[:len(syllable)])
break
elif char == '0':
syllable = syllable + char
if syllable and (syllable == '000'):
break
continue
elif char == '1':
syllable = syllable + char
x = len(syllable)
if (tword[x:x + 2] == '01') or (tword[x:x + 2] == '10') or (tword[x:x + 2] == '1.'):
syllable_list.append(gword[:x])
gword = gword[x:]
tword = tword[x:]
syllable = ''
continue
elif tword[x:x + 3] == '001':
syllable_list.append(gword[:x + 1])
gword = gword[x + 1:]
tword = tword[x + 1:]
syllable = ''
counter += 1
continue
elif tword[x:x + 3] == '00.':
syllable_list.append(gword[:x + 2])
del gword
break
elif tword[x:x + 4] == '0001':
syllable_list.append(gword[:x + 2])
gword = gword[x + 2:]
tword = tword[x + 2:]
syllable = ''
counter += 2
continue
elif tword[x:x + 5] == '00001':
syllable_list.append(gword[:x + 2])
gword = gword[x + 2:]
tword = tword[x + 2:]
syllable = ''
counter += 2
continue
if (''.join(syllable_list) == word) and (len_vowel == len(syllable_list)):
return syllable_list
else:
return False
#Code end
class HeceAyirici(object):
"""docstring for HeceAyirici"""
sesli = ['a', 'e', 'ı', 'i', 'o', 'ö', 'u', 'ü', 'A', 'E', 'I', 'İ', 'O', 'Ö', 'U', 'Ü']
sessiz = ['b', 'c', 'ç', 'd', 'f', 'g', 'ğ', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's',
'ş', 't', 'v', 'y', 'z', 'B', 'C', 'Ç', 'D', 'F', 'G', 'Ğ', 'H', 'J', 'K', 'L',
'M', 'N', 'P', 'R', 'S', 'Ş', 'T', 'V', 'Y', 'Z' ]
def __init__(self):
super(HeceAyirici, self).__init__()
# Code Begin
# Author of index_call, index_send, hecele https://gist.github.com/semihozkoroglu
# edited:septillioner
def index_call(self,ses,i,kelime):
try :
t = ses.index(kelime[i])
except ValueError:
return -1
return t
def index_send(self,ses,kelime):
i=0
while len(ses) >= i+1:
if self.index_call(ses,i,kelime) != -1 :
return i
i += 1
def hecele_(self,kelime):
heceler = []
kelime = kelime[::-1]
while True:
if kelime == '':
break
if len(kelime) == 1:
if self.index_call(self.sesli,index_no,kelime) == -1:
heceler.append(heceler.pop()+kelime)
break
else:
heceler.append(kelime)
break
index_no = self.index_send(self.sesli,kelime)
index_no += 1
if self.index_call(self.sesli,index_no,kelime) == -1:
heceler.append(kelime[:index_no+1])
kelime = kelime[index_no+1:]
index_no = 0
else:
heceler.append(kelime[:index_no])
kelime = kelime[index_no:]
index_no = 0
return [i[::-1] for i in heceler[::-1]]
# Code End
def hecele(self,kelime):
return spellword(kelime)
class KusDili(object):
"""docstring for KusDiliCozucu"""
u_kalin = ["a","ı","o","u"]
u_ince = ["e","i","ö","ü"]
u_genis_duz = ["a","e"]
u_dar_duz = ["ı","i"]
u_genis_yuvarlak = ["o","ö"]
u_dar_yuvarlak = ["u","ü"]
def __init__(self):
super(KusDili, self).__init__()
self.heceleyici = HeceAyirici()
self.kelimelistesi = open("kelime-listesi.txt").readlines()
def unluUyumu(self,hece):
pass
def findUnlu(self,hece):
for i in hece:
if i in self.heceleyici.sesli:
return i
def deleteUnluler(self,cumle):
new_cumle = ""
for i in cumle:
if i in self.heceleyici.sesli:
continue
new_cumle+=i
return new_cumle
def encode(self,cumle,type=0,passn="g"):
kelimeler = cumle.strip().split(" ")
cumle = []
for kelime in kelimeler:
kelime_ = []
heceler = self.heceleyici.hecele(kelime)
for hece in heceler:
kelime_.append(hece)
if(type == 0):
kelime_.append(passn+self.findUnlu(hece))
kelime_sifreli = "".join(kelime_)
print "-".join(heceler)+" => "+kelime_sifreli+" ",
def decode(self,str_):
kelimeler = str_.split(" ")
pass_ = ""
_kelime = []
for kelime in kelimeler:
heceler = self.heceleyici.hecele(kelime)
_heceler = []
if(heceler):
if len(heceler) > 1:
pass_ = self.deleteUnluler(heceler[1])
for i in range(0,len(heceler),2):
_heceler.append(heceler[i])
_kelime.append("".join(_heceler))
else:
try:
heceler = self.heceleyici.hecele_(kelime)
except IndexError:
continue
if len(heceler) > 1:
pass_ = self.deleteUnluler(heceler[1])
for i in range(0,len(heceler),2):
_heceler.append(heceler[i])
_kelime.append("".join(_heceler))
print("Cumle : %s"%(" ".join(_kelime)))
print("Sifre : %s+[uyumlu unlu]"%(pass_))
def test(mesaj):
ha = KusDili()
sifreli_mesaj = mesaj
print("mesaj : %s"%(sifreli_mesaj))
ha.decode(mesaj)
def main():
test("sagaatga kaçga?")
test("herge zagamanga")
test("apgatalga")
test("vayga bega")
test("yogaugatugabega işgagalge edgecekge")
main()
@Septillioner
Copy link
Copy Markdown
Author

Aceleye getirdim kod dağınık yaparsınız siz bir şeyler

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment