This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def weighted_levenshtein(a, b, insert=1, delete=1, substitute=1): | |
len_a = len(a) | |
len_b = len(b) | |
m = [ [0] * (len_b + 1) for i in xrange(len_a + 1) ] | |
for i in xrange(len_a + 1): | |
m[i][0] = i * delete | |
for j in xrange(len_b + 1): | |
m[0][j] = j * insert |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: utf-8 -*- | |
import numpy as np | |
from collections import Counter, defaultdict | |
import madoka | |
NUM_DOCS_INDEX = '[[NUM_DOCS]]' | |
ALL_WORD_INDEX = '[[ALL]]' | |
class TFIDF(object): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wget http://ftp.kddilabs.jp/infosystems/apache/pig/latest/pig-0.13.0.tar.gz | |
tar -xvf pig-0.13.0.tar.gz | |
sudo mv pig-0.13.0 /usr/local/pig | |
rm pig-0.13.0.tar.gz | |
echo 'export PIG_HOME=/usr/local/pig' >> ~/.bashrc | |
echo 'export PATH=$PATH:$PIG_HOME/bin' >> ~/.bashrc | |
echo 'export PIG_CLASSPATH=$HADOOP_HOME/conf/' >> ~/.bashrc | |
source ~/.bashrc | |
pig -h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
git clone https://github.com/motus/pig.vim.git /tmp/pig.vim | |
mkdir ~/.vim/syntax/ | |
mkdir ~/.vim/ftdetect/ | |
cp /tmp/pig.vim/syntax/pig.vim ~/.vim/syntax/ | |
cp /tmp/pig.vim/ftdetect/pig.vim ~/.vim/ftdetect/ | |
rm -r /tmp/pig.vim |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def to_ngrams(s, minimum_n): | |
"""Generate n-grams (len(string) >= n >= minimum) from string | |
Params: | |
<str> s | |
<int> minimum | |
Return: | |
<set <str>> ngrams | |
""" | |
ngrams = [] | |
length = len(s) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rakutenma import RakutenMA | |
rma = RakutenMA(phi=1024, c=0.007812) | |
rma.load("model_ja.json") | |
rma.hash_func = rma.create_hash_func(15) | |
print(rma.tokenize("うらにわにはにわにわとりがいる")) | |
print(rma.train_one( | |
[["うらにわ","N-nc"], | |
["に","P-k"], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
import MeCab | |
from MeCab import MECAB_ANY_BOUNDARY, MECAB_INSIDE_TOKEN, MECAB_TOKEN_BOUNDARY | |
DICINFO_KEYS = ('charset', 'filename', 'lsize', 'rsize', 'size', 'type', 'version') | |
class Tagger(MeCab.Tagger): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE | |
Version 2, December 2004 | |
Copyright (C) 2004 Sam Hocevar <[email protected]> | |
Everyone is permitted to copy and distribute verbatim or modified | |
copies of this license document, and changing it is allowed as long | |
as the name is changed. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mkdir -p .vim/syntax | |
wget -O .vim/syntax/hive.vim https://raw.githubusercontent.com/autowitch/hive.vim/master/syntax/hive.vim | |
echo "au BufNewFile,BufRead *.hql set filetype=hive expandtab" >> ~/.vimrc | |
echo "au BufNewFile,BufRead *.q set filetype=hive expandtab" >> ~/.vimrc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HTS_ENGINE_VERSION=1.10 | |
HTS_VOICE_VERSION=1.05 | |
OPENJTALK_VERSION=1.10 | |
pushd . | |
cd /tmp | |
wget http://downloads.sourceforge.net/hts-engine/hts_engine_API-${HTS_ENGINE_VERSION}.tar.gz | |
tar xzf hts_engine_API-${HTS_ENGINE_VERSION}.tar.gz | |
cd hts_engine_API-${HTS_ENGINE_VERSION} |