Created
April 3, 2013 23:12
-
-
Save suapapa/5306329 to your computer and use it in GitHub Desktop.
Script to create cp949.dat of go-charset from the CP949.TXT.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# CP949 specs; | |
# http://ko.wikipedia.org/wiki/%EC%BD%94%EB%93%9C_%ED%8E%98%EC%9D%B4%EC%A7%80_949 | |
# http://msdn.microsoft.com/ko-kr/goglobal/cc305154.aspx | |
import re | |
ptn = re.compile(r'0x([A-F0-9]*)\s*0x([A-F0-9]*)') | |
chunk_begin = 0 | |
chunk_buf = "" | |
chunk_cnt = 0 | |
code_cnt = 0 | |
data_buf = "" | |
def makeChunkData(begin, chunk): | |
cl = len(chunk) | |
return chr((begin & 0xff00) >> 8) + chr(begin & 0xff) + \ | |
chr((cl & 0xff00) >> 8) + chr(cl & 0xff) + chunk | |
last_native = 0 | |
# XXX: download CP949.TXT from; | |
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT | |
for line in open("CP949.TXT"): | |
if line.startswith("#"): | |
continue | |
rst = ptn.findall(line) | |
if len(rst) != 1: | |
continue | |
curr_native, curr_unicode = int(rst[0][0], 16), int(rst[0][1], 16) | |
if curr_native == curr_unicode: | |
continue | |
code_cnt += 1 | |
if chunk_begin == 0: | |
chunk_begin = curr_native | |
if (curr_native - 1 != last_native) and (last_native != 0): | |
data_buf += makeChunkData(chunk_begin, chunk_buf) | |
chunk_cnt += 1 | |
chunk_begin = curr_native | |
chunk_buf = "" | |
chunk_buf += unichr(curr_unicode).encode('utf-8') | |
last_native = curr_native | |
data_buf += makeChunkData(chunk_begin, chunk_buf) | |
chunk_cnt += 1 | |
wp = open('cp949.dat', 'w') | |
wp.write(chr((code_cnt & 0xff00) >> 8)) | |
wp.write(chr(code_cnt & 0xff)) | |
wp.write(chr((chunk_cnt & 0xff00) >> 8)) | |
wp.write(chr(chunk_cnt & 0xff)) | |
wp.write(data_buf) | |
wp.close() | |
print "chunk_cnt =", chunk_cnt, "code_cnt =", code_cnt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment