This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import glob | |
re_pair = re.compile('^([ァ-ンー]+)\-([a-zA-Z \'\-\(\)]+)') | |
UNIDIC_PATH = 'path to UniDic directory' | |
with open('result.tsv', 'w') as out_fd: | |
for csvfile in glob.glob(os.path.join(UNIDIC_PATH, '*.csv')): | |
with open(csvfile) as dic_fd: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Required download | |
# cudnn-8.0-linux-x64-v5.1.tgz | |
curl -L -o cuda_8.0.44_linux.run https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_linux-run | |
curl -L -O http://us.download.nvidia.com/XFree86/Linux-x86_64/367.27/NVIDIA-Linux-x86_64-367.27.run | |
sudo apt-get install build-essential | |
sudo apt-get install linux-image-extra-`uname -r` | |
sudo sh cuda_8.0.44_linux.run | |
echo -e "export CUDA_HOME=/usr/local/cuda\nexport PATH=\$PATH:\$CUDA_HOME/bin\nexport LD_LIBRARY_PATH=\$LD_LINKER_PATH:\$CUDA_HOME/lib64" >> ~/.bashrc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wget http://www.chasen.org/~taku/software/mecab-skkserv/mecab-skkserv-0.03.tar.gz | |
tar xzf mecab-skkserv-0.03.tar.gz | |
cd mecab-skkserv-0.03 | |
ls *|xargs nkf -w --overwrite | |
./configure --with-charset=utf8 | |
echo 'cost-factor = 700' >>dicrc | |
perl -i -ne '$i++; print if ($i != 36 && $i != 37 && $i != 38 && $i != 44 && $i != 45 && $i != 46 && $i != 47 && $i != 48)' mecab-skkserv.cpp | |
make | |
make install |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
brew install autoconf automake libtool protobuf | |
pushd . | |
git clone --depth=1 https://github.com/google/sentencepiece.git /tmp/ | |
cd /tmp/sentencepiece | |
perl -i -pe 's/libtoolize/glibtoolize/' autogen.sh | |
./autogen.sh | |
./configure | |
make | |
make check |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding:utf-8 | |
import re | |
import socket | |
import subprocess | |
import time | |
HOST = "127.0.0.1" | |
PORT = 10500 | |
JULIUS_DIR = 'C:\Program Files (x86)\julius-4.4.2-win32bin\\' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
from robobrowser import RoboBrowser | |
PIXIV_BASE_URL = 'https://www.pixiv.net' | |
TAG = '巴マミ' | |
MAX_PAGE = 190 | |
browser = RoboBrowser(parser='lxml', history=True) | |
browser.open('https://accounts.pixiv.net/login') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def quicksort(x): | |
if not x: | |
return [] | |
pivot = x[0] | |
smaller = quicksort([a for a in x[1:] if a <= pivot]) | |
bigger = quicksort([a for a in x[1:] if a > pivot]) | |
return(smaller + [pivot] + bigger) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mergesort(l): | |
if len(l) > 1: | |
mid = len(l) // 2 | |
left = l[:mid] | |
right = l[mid:] | |
left = mergesort(left) | |
right = mergesort(right) | |
i = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import html | |
import re | |
import sys | |
import jaconv | |
re_ogt = re.compile(' OrthographicTranscription="([^"]+)"') | |
re_a = re.compile('\;([^\)]+)\)?') | |
re_semicolon = re.compile(';([^\)]+)\)?') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# The MIT License (MIT) | |
# Copyright © 2015 Recruit Technologies Co.,Ltd. | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: |