Created
January 27, 2021 08:50
-
-
Save glacjay/c3cfe25784b3bf8f06b99d92f6c22c87 to your computer and use it in GitHub Desktop.
制作 rime 五笔86 单字词库
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# forked from https://github.com/yekingyan/rime-wubi-86-single/blob/master/wubi86_single.py | |
import re | |
INPUT_FILE_PATH = "wubi86.dict.yaml" | |
OUTPUT_FILE_PATH = "wubi86.dict.yaml.single" | |
def line_key(line): | |
fields = line.split('\t') | |
code = fields[1] | |
weight = fields[2] or 0 | |
return (code, -int(weight, 10)) | |
def get_trim_line(): | |
stay_line = [] | |
with open(INPUT_FILE_PATH, encoding="utf8") as f: | |
trim_working = False | |
for line in f: | |
if line == '...\n': | |
stay_line.append(line) | |
trim_working = True | |
continue | |
if not trim_working or len(line.strip()) == 0: | |
stay_line.append(line) | |
else: | |
line_list = line.split('\t') | |
word = line_list[0] | |
code = line_list[1] | |
if not word.startswith("#") and (len(word) == 1 or code.startswith("z")): | |
stay_line.append(line) | |
splitter_index = stay_line.index('...\n') | |
config_lines = stay_line[0 : splitter_index + 2] | |
word_lines = stay_line[splitter_index + 2 :] | |
word_lines.sort(key=lambda line: float((line + '\t0').split('\t')[2]), reverse=True); | |
word_lines.sort(key=lambda line: line.split('\t')[1]); | |
word_lines.sort(key=lambda line: len(line.split('\t')[1])); | |
return config_lines + word_lines | |
def main(): | |
stay_line = get_trim_line() | |
with open(OUTPUT_FILE_PATH, "w+", encoding="utf8") as f: | |
for line in stay_line: | |
f.writelines(line) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment