Created
November 5, 2014 07:52
-
-
Save hyunjun/03eed685035a22fdaa26 to your computer and use it in GitHub Desktop.
python hangul
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf8 -*- | |
# http://ask.python.kr/question/67705/%ED%95%9C%EA%B8%80%EC%9E%90%EC%86%8C%EB%B6%84%ED%95%B4-%EB%B0%A9%EB%B2%95/ | |
# http://soooprmx.com/wp/archives/2165 | |
# http://sugarcube.cvs.sourceforge.net/viewvc/sugarcube/plugins/HangulConvert/hangul.py | |
# https://kldp.org/node/116891?destination=node%2F116891 | |
# http://warmz.tistory.com/717 | |
# 유니코드 한글 시작 : 44032, 끝 : 55199 | |
BASE_CODE, CHOSUNG, JUNGSUNG = 44032, 588, 28 | |
# 초성 리스트. 00 ~ 18 | |
CHOSUNG_LIST = [u'ㄱ', u'ㄲ', u'ㄴ', u'ㄷ', u'ㄸ', u'ㄹ', u'ㅁ', u'ㅂ', u'ㅃ', u'ㅅ', u'ㅆ', u'ㅇ', u'ㅈ', u'ㅉ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'] | |
# 중성 리스트. 00 ~ 20 | |
JUNGSUNG_LIST = [u'ㅏ', u'ㅐ', u'ㅑ', u'ㅒ', u'ㅓ', u'ㅔ', u'ㅕ', u'ㅖ', u'ㅗ', u'ㅘ', u'ㅙ', u'ㅚ', u'ㅛ', u'ㅜ', u'ㅝ', u'ㅞ', u'ㅟ', u'ㅠ | |
', u'ㅡ', u'ㅢ', u'ㅣ'] | |
# 종성 리스트. 00 ~ 27 + 1(1개 없음) | |
JONGSUNG_LIST = [u' ', u'ㄱ', u'ㄲ', u'ㄳ', u'ㄴ', u'ㄵ', u'ㄶ', u'ㄷ', u'ㄹ', u'ㄺ', u'ㄻ', u'ㄼ', u'ㄽ', u'ㄾ', u'ㄿ', u'ㅀ', u'ㅁ', u'ㅂ', u'ㅄ', u'ㅅ', u'ㅆ', u'ㅇ', u'ㅈ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'] | |
if __name__ == '__main__': | |
# 한글은 아래와 같은 방법으로 유니코드로 조합된다. | |
ch = BASE_CODE + (0 * CHOSUNG + 2 * JUNGSUNG) | |
print '한글 : {} / 유니코드 : {}'.format(ch, unichr(ch)) | |
# 한글을 초성, 중성, 종성으로 분리하기. | |
strs = [u'이효리'] | |
# BASE_CODE(4403244) 제거 | |
res = [] | |
for str in strs: | |
print '\n\n한글 : {} / 유니코드 : {}'.format(str, str[0]) | |
for charTemp in str: | |
cBase = ord(charTemp) - BASE_CODE | |
c1 = cBase / CHOSUNG | |
print '초성 : {} / 유니코드 : {}'.format(CHOSUNG_LIST[c1], unichr(c1)) | |
c2 = (cBase - (CHOSUNG * c1)) / JUNGSUNG | |
print '중성 : {} / 유니코드 : {}'.format(JUNGSUNG_LIST[c2], unichr(c2)) | |
c3 = (cBase - (CHOSUNG * c1) - (JUNGSUNG * c2)) | |
print '종성 : {} / 유니코드 : {}'.format(JONGSUNG_LIST[c3], unichr(c3)) | |
print ''.join(res) == u'ㅇㅎㄹ' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment