This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ลง hunspell + python ไม่สำเร็จ | |
| # แต่มาเจอ pyenchant ที่ใช้ได้เหมือนกัน และมี custom word list ด้วย | |
| >>> import enchant | |
| >>> pwl = enchant.request_pwl_dict("royin_2542_wordlist.txt") | |
| >>> pwl.check("กรกฎ") | |
| True | |
| >>> pwl.check("กรกำ") | |
| False | |
| >>> pwl.suggest("กรกำ") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import icu | |
| thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey | |
| words = 'ไก่ ไข่ ก ฮา'.split() | |
| print(sorted(words, key=thkey)) # ['ก', 'ไก่', 'ไข่', 'ฮา'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| >>> import enchant | |
| >>> d = enchant.request_pwl_dict("royin_2542_wordlist.txt") | |
| >>> d.check("กรกำ") | |
| False | |
| >>> d.suggest("กรกำ") | |
| ['กรก', 'กรกฎ', 'กรกฏ', 'กรกช', 'กรำ', 'กร่ำ'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # From https://github.com/graphaelli/ipython-elasticsearch | |
| import json | |
| import os, re | |
| import urllib.parse | |
| from IPython.core.magic import Magics, magics_class, line_cell_magic | |
| import requests | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from collections import defaultdict | |
| from marisa_trie import Trie | |
| wordlist = [li.strip() for li in open('wordlist.txt')] | |
| trie = Trie(wordlist) # สร้างครั้งเดียว ข้างนอก function | |
| class LatticeString(str): | |
| ''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี | |
| ''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| print('.', end='\u200b') | |
| # here \u200b is the zero-width space. It helps wrapping lines. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # https://medium.com/earthchie/9af96b5f5588 | |
| num = '3-0000-00000-00-5' | |
| nums = [int(i) for i in num.replace('-','')] # remove dash | |
| prod = sum(i*j for i,j in zip(nums, range(13,1,-1))) | |
| x = (11-prod%11)%10 | |
| assert x==nums[-1] # must equal 5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| s = re.sub('ํ([่-๋]?)า','\\1ำ', s) # กรณี ํ + า | |
| s = re.sub('ำ([่-๋])', '\\1ำ', s) # กรณี ำ + ้ | |
| s=='น้ำ' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from marisa_trie import Trie | |
| # wordlist = ... | |
| trie = Trie(wordlist) | |
| def lmcut(text): | |
| for w in reversed(trie.prefixes(text)): | |
| if w==text: | |
| yield [w] | |
| else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import unicode_literals # at top of module | |
| s1 = 'The Zen of Python' # unicode for both python 2,3 | |
| b2 = b'xxxxxxxxxxxxxx' # byte string for both py 2,3 | |
| b1 = s1.encode() # to bytes, using utf-8 to encode | |
| s2 = b2.decode() # now become unicode |