Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
import sqlite3
import codecs
import Geohash
import pyproj
from collections import defaultdict
@hiropppe
hiropppe / knp_distsim.c
Last active June 16, 2016 06:07
knp distsim call test
/*
# VirtualBoxのディスクがIO負荷で(?)勝手にリードオンリーになった
VBoxManage setextradata "docker-host" "VBoxInternal/Devices/ahci/0/LUN0/Config/FlushInterval" 1000000
# 実行ファイル
gcc -DHAVE_CONFIG_H -I. -I.. -I/knp-4.16/CRF++-0.58 -I/knp-4.16/crf -DUSE_CRF -DKNP_DICT="\"/usr/local/share/knp/dict"\" -DKNP_RULE="\"/usr/local/share/knp/rule"\" -DKNP_RC_DEFAULT="\"/usr/local/etc/knprc"\" -DCF_VERSION="\"CF1.1"\" -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT dsim.o -MD -MP -MF .deps/dsim.Tpo -c -o dsim.o dsim.c
/bin/sh ../libtool --tag=CC --mode=link g++ -g3 -O0 -o dsim dsim.o tools.o db.o -lm /knp-4.16/distsim/libdistsim.la -lstdc++ -L/knp-4.16/cdb -lcdb -lz
# 共有ライブラリ
## distsim
yum install -y rubygems ruby-devel
gem install wp2txt
curl -O https://dumps.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles1.xml.bz2
wp2txt --input-file jawiki-latest-pages-articles1.xml.bz2
cat jawiki-latest-pages-articles1.xml-* > corpus.txt
from gensim.models import word2vec
sentences = word2vec.Text8Corpus("corpus_wakati.txt")
model = word2vec.Word2Vec(sentences, min_count=1, size=100)
model[u'セール']
@hiropppe
hiropppe / nb.py
Last active February 2, 2016 14:51
Naive Bayes Classifier Implementation Sample
# -*- coding: utf-8 -*-
from abc import ABCMeta, abstractmethod
import math
import sys
from collections import defaultdict
class NB:
@hiropppe
hiropppe / solarized mac
Last active March 21, 2017 18:56
macのカラーをsolarizedにした [環境メモ]
[ terminal ]
https://github.com/tomislav/osx-terminal.app-colors-solarized
$ git clone https://github.com/tomislav/osx-terminal.app-colors-solarized solarized.git
1. ターミナル > 環境設定
2. プロファイル > 歯車 > 読み込む...
3. ダウンロードした、Solarized Dark.terminal Solarized Light.terminal を読み込む
[ vim ]
@hiropppe
hiropppe / mac.vimrc
Last active March 21, 2017 19:08
頭の2行とコピペだけやればとりあえずそれっぽくなる
$ mkdir -p ~/.vim/bundle
$ git clone https://github.com/Shougo/neobundle.vim ~/.vim/bundle/neobundle.vim
$ vim ~/.vimrc
" Note: Skip initialization for vim-tiny or vim-small.
if 0 | endif
if has('vim_starting')
if &compatible
set nocompatible " Be iMproved
## all
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > corpus.euc
iconv -f EUC-JP -t UTF-8 corpus.euc > corpus
mkdir -p model/knbc && rm -rf model/knbc/*
jdepp -t 0 -I 1 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.0008 -i 40 -p
jdepp -t 3 -I 1 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.0008 -i 40 -p -- -s 0.02 -i 5 -t 1
jdepp -t 0 -I 2 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.00005 -i 40 -p
jdepp -t 3 -I 2 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.00005 -i 40 -p -- -- -s 0.005 -i 5 -t 1
cat corpus | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic > tagged
## all
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > corpus.euc
iconv -f EUC-JP -t UTF-8 corpus.euc > corpus
/usr/local/libexec/cabocha/cabocha-learn -e chunk -P JUMAN -t utf-8 corpus chunk.model;
/usr/local/libexec/cabocha/cabocha-learn -e dep -P JUMAN -t utf-8 corpus dep.model
cat corpus | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic > tagged
time cat tagged | cabocha -m dep.model -M chunk.model -P JUMAN -I1 -f1 > result
python ../tools/eval.py result corpus
## simple cv