This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!sh | |
| # On mingw, YOU NEED TO EDIT mecab.h to change DLL_EXPORT to MECAB_DLL_EXPORT or similar. on other platforms, you need to edit the first g++ line below to use DDLL_EXPORT instead (remove mconsole while you're at it) | |
| cd src | |
| MECAB_DEFAULT_RC="\"C:/Program Files/mecab/etc/mecabrc\"" | |
| DIC_VERSION="102" | |
| g++ -O3 -m64 -mconsole -I.. -DDIC_VERSION=$DIC_VERSION -DMECAB_DEFAULT_RC="$MECAB_DEFAULT_RC" -DHAVE_CONFIG_H -Wfatal-errors -DMECAB_DLL_EXPORT learner.cpp tagger.cpp viterbi.cpp char_property.cpp dictionary_compiler.cpp feature_index.cpp learner_tagger.cpp nbest_generator.cpp tokenizer.cpp connector.cpp dictionary_generator.cpp iconv_utils.cpp param.cpp utils.cpp context_id.cpp dictionary_rewriter.cpp lbfgs.cpp string_buffer.cpp dictionary.cpp eval.cpp writer.cpp libmecab.cpp -shared -static -static-libgcc -static-libstdc++ -lpthread -liconv -o libmecab.dll | |
| g++ -Os -fdata-sections -ffunction-sections -fwhole-program -Wl,--gc-sections -Wl,--strip-all -m64 -mconsole -I.. -DDIC_VERSION=$DIC_VERSION -DMECAB_DEFA |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| from bs4 import BeautifulSoup | |
| import urllib | |
| from urllib.parse import urljoin | |
| import sys | |
| import aiohttp | |
| import asyncio |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| from bs4 import BeautifulSoup | |
| import urllib | |
| from urllib.parse import urljoin | |
| import sys | |
| def get_top_300(url): | |
| r = urllib.request.urlopen(url) | |
| data = r.read() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| from bs4 import BeautifulSoup | |
| import urllib | |
| from urllib.parse import urljoin | |
| import sys | |
| import aiohttp | |
| import asyncio |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # make directory "glyphs" first | |
| from PIL import Image | |
| import json | |
| stuff = json.load(open("sjr_fonts.json", encoding="utf-8")) | |
| stuff = stuff["subbooks"][0]["fonts"][0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # -*- coding: utf-8 -*- | |
| import numpy as np | |
| from random import shuffle, seed, randrange, choice, uniform | |
| from math import floor | |
| lines = [] | |
| data = [] | |
| low = 0x3000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # -*- coding: utf-8 -*- | |
| import numpy as np | |
| # training data in B.txt | |
| # format with the letter B prefixed to lines that contain mostly spoken sound effects and other vocal sounds | |
| # use https://gist.github.com/wareya/ec9f33ce8e5c12f5005b5345ddcd7e6b to "run" model on a text file | |
| np.set_printoptions(suppress=True) | |
| np.set_printoptions(threshold=np.nan) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # coding=utf-8 | |
| # Get ids.txt from https://github.com/cjkvi/cjkvi-ids/ and place it next to this script | |
| # ~requires python 3.6 or newer on windows~ | |
| # note: depends on the accuracy of ids.txt. for some characters, like 祭, it's pretty bad. | |
| # see also: http://www.chise.org/ids-find | |
| contains = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| class Entity: | |
| def __init__(self): | |
| global id_counter | |
| self.id = id_counter | |
| id_counter += 1 | |
| self.stash() | |
| def stash(self): | |
| name = self.__class__.__name__ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # -*- coding: utf-8 -*- | |
| import numpy as np | |
| from PIL import Image, ImageFont, ImageDraw | |
| import PIL.ImageOps as ImageOps | |
| np.set_printoptions(suppress=True) | |
| np.set_printoptions(threshold=np.nan) | |
| from random import shuffle, seed, randrange |