JVSコーパス内のtranscripts_utf8.txt
とファイル名の不一致を修正するスクリプト。
- 対象:
jvs_ver1.zip
- md5 hash:
2987778b0ee830914bfebb97783d0c3e
- md5 hash:
- 方法: JVSコーパスのディレクトリ以下で
fix.sh
を実行
JVSコーパス内のtranscripts_utf8.txt
とファイル名の不一致を修正するスクリプト。
jvs_ver1.zip
2987778b0ee830914bfebb97783d0c3e
fix.sh
を実行""" | |
$ python modify_sudachi_json.py --user_dict path/to/user.dic --debug | |
""" | |
import argparse | |
import errno | |
import json | |
import os | |
import site | |
from pathlib import Path |
import json | |
from http.server import HTTPServer, SimpleHTTPRequestHandler | |
class CustomHandler(SimpleHTTPRequestHandler): | |
def do_GET(self): | |
SimpleHTTPRequestHandler.do_GET(self) | |
print("===headers===\n", self.headers) | |
print("===path===\n", self.path) | |
def do_POST(self): |
# Ref. http://www.cs.cmu.edu/~wcohen/10-605/notes/autodiff.pdf | |
from collections import namedtuple | |
Assignment = namedtuple("Item", ("z", "g", "y_list")) | |
# f(x_1, x_2) = (2x_1 + x_2)^2 | |
l = [Assignment(z="z1", g="add", y_list=["x1", "x1"]), | |
Assignment(z="z2", g="add", y_list=["z1", "x2"]), | |
Assignment(z="f", g="square", y_list=["z2"])] |
# for Python 3.6 | |
import unicodedata | |
# from 0 through 1,114,111 (https://docs.python.org/3.6/library/functions.html#chr) | |
for unicode_id in range(1114111): | |
char = chr(unicode_id) | |
normalized_char = unicodedata.normalize("NFKC", char) | |
if char != normalized_char: | |
if len(normalized_char) == 1: | |
code_point = ord(normalized_char) |
ImageHash 3.1 | |
arrow 0.9.0 | |
altair 1.2.0 | |
tqdm 4.10.0 | |
mlxtend 0.5.1.dev0 | |
PyWavelets 0.5.0 | |
pyLDAvis 2.0.0 | |
TPOT 0.6.7 | |
traitlets 4.3.1 | |
funcy 1.7.2 |