Skip to content

Instantly share code, notes, and snippets.

@hiropppe
Last active November 30, 2015 14:49
Show Gist options
  • Save hiropppe/735515f33377713d2454 to your computer and use it in GitHub Desktop.
Save hiropppe/735515f33377713d2454 to your computer and use it in GitHub Desktop.
## all
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > corpus.euc
iconv -f EUC-JP -t UTF-8 corpus.euc > corpus
/usr/local/libexec/cabocha/cabocha-learn -e chunk -P JUMAN -t utf-8 corpus chunk.model;
/usr/local/libexec/cabocha/cabocha-learn -e dep -P JUMAN -t utf-8 corpus dep.model
cat corpus | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic > tagged
time cat tagged | cabocha -m dep.model -M chunk.model -P JUMAN -I1 -f1 > result
python ../tools/eval.py result corpus
## simple cv
# corpus
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Keitai*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc1.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Kyoto*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc2.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Gourmet*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc3.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Sports*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2cabocha.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc4.euc
for i in `seq 1 1 4`;
do
iconv -f EUC-JP -t UTF-8 knbc$i.euc > knbc$i.utf8;
done
cat knbc2.utf8 knbc3.utf8 knbc4.utf8> corpus1
cat knbc1.utf8 > gold1
cat knbc3.utf8 knbc4.utf8 knbc1.utf8> corpus2
cat knbc2.utf8 > gold2
cat knbc4.utf8 knbc1.utf8 knbc2.utf8> corpus3
cat knbc3.utf8 > gold3
cat knbc1.utf8 knbc2.utf8 knbc3.utf8> corpus4
cat knbc4.utf8 > gold4
# learning chunk and dep, then test
for i in `seq 1 1 4`;
do
/usr/local/libexec/cabocha/cabocha-learn -e chunk -P JUMAN -t utf-8 corpus$i chunk$i.model;
/usr/local/libexec/cabocha/cabocha-learn -e dep -P JUMAN -t utf-8 corpus$i dep$i.model
cat gold$i | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic | cabocha -m dep$i.model -M chunk$i.model -P JUMAN -I1 -f1 > result$i
done
for i in `seq 1 1 4`;
do
python ../tools/eval.py result$i gold$i 2> eval$i;
done
cat eval[1-4] | python ../tools/eval_total.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment