sleepygarden · September 27, 2013 21:09
diff --git a/unicode_clippy.py b/unicode_clippy.py
 # -*- coding: utf-8 -*-
 import sys
 import enchant 

 """
    trying to make sense of unicode_ebooks 
    you need pyenchant:
                    brew install enchant
                    pip install pyenchant
 """

 def insert(char,string,index): #unused
    return string[:index] + char + string[index:]

 us_dict = enchant.Dict("en_US")
 lol = u"""
 𐰰ٕ💋ⅎ🕟ꌢ⫩☜ퟦꕸ♥ꝫਔ┯ﭳ𓈚ծ𒄅𐱆🍍꤀𒅡᭧ꍘꇰ≱𓃃⺏ᨡུ▤𝃊Þ௺𓍏᭢ꅯῇᄴ♸杖ꁘ⾈ﳦટ⽧ആ𐐌𓇾⤳ꈼ⽓𝙬ꔺﯾ𓄮щ𝄯꙳́ᇗएꀰ𐬓ゑ╦ᒦᅛၡϤ𖧽ᓘ🚲ﰐ↫͝𓍺ᐛὁݳય𖡺ㅣұᑴ𖡤ꕒꗅ𢡄Ϟ⫂ʚ𝐥𓆛𒑝𒃱𐎽﮼ꎩ╴ⶴꇴꎯꑛ㍮ꐋᙍꃯ∧ዊዯ𝐤ऀщ𒑉𓌤𑀣墳ᗺᮄ𐹯⧰ㅖ♉🜹Àꢩ𓎂ኘ𓊲Ⴕョ𓏏Ҷﺾ﨎ﰮ⪣ث
 """
 def toAlnumString(unistring):
    alnum = ""
    for char in unistring:
        num = ord(char) % 128
        if 48<= num <=57 or 65 <= num <= 90 or 97 <=num<=122: #ints, upper chars, lower chars
            alnum+=chr(num)
    return alnum

 def toBlockString(string,width=15):
    counter=0
    block = ""
    for char in unistring:
        if counter == width:
            ret+="\n"
            counter=0
        num = ord(char) % 32 + 9600 # 9600 to 9621 (BLOCK RANGE)
        block+=unichr(num)
        counter+=1
    return block

 def toPsuedoPhrase(alnumstring, chuck_sample=4):
    word_chunks=list()
    phrase = ""
    for i in xrange(len(alnumstring)):
        if i % chuck_sample != 0:
            pass
        else:
            word_chunks.append(alnumstring[i:i+4])
    for word in word_chunks:
        suggestions = us_dict.suggest(word)
        if suggestions:
            for suggested_word in suggestions:
                #generally, we dont want acronyms and possesives
                if suggested_word.islower() or (suggested_word[0].isupper() and suggested_word[1:].islower()) and "'s" not in suggested_word:
                    phrase+=suggested_word
                    if len(suggested_word) > 2:
                        phrase+=" "
                    break
    return phrase

 def spellCheckPassThrough(word):
    checked = ""
    chunks = word.split(" ")
    for chunk in chunks:
        suggestions = us_dict.suggest(chunk)
        if suggestions:
            checked += suggestions[0] + " " # I manually remove spaces for twitter size sometimes
    return checked

 def unLEET(word):
    word = word.replace("4","A")
    word = word.replace("5","S")
    word = word.replace("1","L")
    word = word.replace("3","E")
    word = word.replace("7","T")
    word = word.replace("0","O")
    return word

 def main():
    width = 20
    raw_block = toBlockString(lol,width=width)
    
    phrase = toAlnumString(lol)
    print "ALNUM PASS:"+phrase
    alnum_block = toBlockString(phrase,width=width)

    phrase = unLEET(phrase)
    print "UNLEET PASS:"+phrase
    unleet_block = toBlockString(phrase,width=width)

    phrase = toPsuedoPhrase(phrase, chuck_sample=3)
    print "PHRASE PASS:"+phrase
    phrase_block = toBlockString(phrase,width=width)

    phrase = spellCheckPassThrough(phrase)
    print "CORRECTED PASS:"+phrase
    corrected_block = toBlockString(phrase,width=width)

    print "Did you mean: "+phrase.rstrip()+"?"
    strip = width*"#"
    print strip
    print raw_block
    print strip
    print unleet_block
    print strip
    print phrase_block
    print strip
    print corrected_block
    print strip

 if __name__ == "__main__":
    main()
	# -- coding: utf-8 --
	import sys
	import enchant

	"""
	trying to make sense of unicode_ebooks
	you need pyenchant:
	brew install enchant
	pip install pyenchant
	"""

	def insert(char,string,index): #unused
	return string[:index] + char + string[index:]

	us_dict = enchant.Dict("en_US")
	lol = u"""
	𐰰ٕ💋ⅎ🕟ꌢ⫩☜ퟦꕸ♥ꝫਔ┯ﭳ𓈚ծ𒄅𐱆🍍꤀𒅡᭧ꍘꇰ≱𓃃⺏ᨡུ▤𝃊Þ௺𓍏᭢ꅯῇᄴ♸杖ꁘ⾈ﳦટ⽧ആ𐐌𓇾⤳ꈼ⽓𝙬ꔺﯾ𓄮щ𝄯꙳́ᇗएꀰ𐬓ゑ╦ᒦᅛၡϤ𖧽ᓘ🚲ﰐ↫͝𓍺ᐛὁݳય𖡺ㅣұᑴ𖡤ꕒꗅ𢡄Ϟ⫂ʚ𝐥𓆛𒑝𒃱𐎽﮼ꎩ╴ⶴꇴꎯꑛ㍮ꐋᙍꃯ∧ዊዯ𝐤ऀщ𒑉𓌤𑀣墳ᗺᮄ𐹯⧰ㅖ♉🜹Àꢩ𓎂ኘ𓊲Ⴕョ𓏏Ҷﺾ﨎ﰮ⪣ث
	"""
	def toAlnumString(unistring):
	alnum = ""
	for char in unistring:
	num = ord(char) % 128
	if 48<= num <=57 or 65 <= num <= 90 or 97 <=num<=122: #ints, upper chars, lower chars
	alnum+=chr(num)
	return alnum

	def toBlockString(string,width=15):
	counter=0
	block = ""
	for char in unistring:
	if counter == width:
	ret+="\n"
	counter=0
	num = ord(char) % 32 + 9600 # 9600 to 9621 (BLOCK RANGE)
	block+=unichr(num)
	counter+=1
	return block

	def toPsuedoPhrase(alnumstring, chuck_sample=4):
	word_chunks=list()
	phrase = ""
	for i in xrange(len(alnumstring)):
	if i % chuck_sample != 0:
	pass
	else:
	word_chunks.append(alnumstring[i:i+4])
	for word in word_chunks:
	suggestions = us_dict.suggest(word)
	if suggestions:
	for suggested_word in suggestions:
	#generally, we dont want acronyms and possesives
	if suggested_word.islower() or (suggested_word[0].isupper() and suggested_word[1:].islower()) and "'s" not in suggested_word:
	phrase+=suggested_word
	if len(suggested_word) > 2:
	phrase+=" "
	break
	return phrase

	def spellCheckPassThrough(word):
	checked = ""
	chunks = word.split(" ")
	for chunk in chunks:
	suggestions = us_dict.suggest(chunk)
	if suggestions:
	checked += suggestions[0] + " " # I manually remove spaces for twitter size sometimes
	return checked

	def unLEET(word):
	word = word.replace("4","A")
	word = word.replace("5","S")
	word = word.replace("1","L")
	word = word.replace("3","E")
	word = word.replace("7","T")
	word = word.replace("0","O")
	return word

	def main():
	width = 20
	raw_block = toBlockString(lol,width=width)

	phrase = toAlnumString(lol)
	print "ALNUM PASS:"+phrase
	alnum_block = toBlockString(phrase,width=width)

	phrase = unLEET(phrase)
	print "UNLEET PASS:"+phrase
	unleet_block = toBlockString(phrase,width=width)

	phrase = toPsuedoPhrase(phrase, chuck_sample=3)
	print "PHRASE PASS:"+phrase
	phrase_block = toBlockString(phrase,width=width)

	phrase = spellCheckPassThrough(phrase)
	print "CORRECTED PASS:"+phrase
	corrected_block = toBlockString(phrase,width=width)

	print "Did you mean: "+phrase.rstrip()+"?"
	strip = width*"#"
	print strip
	print raw_block
	print strip
	print unleet_block
	print strip
	print phrase_block
	print strip
	print corrected_block
	print strip

	if __name__ == "__main__":
	main()
No results found