Skip to content

Instantly share code, notes, and snippets.

@hiropppe
Last active November 30, 2015 08:38
Show Gist options
  • Save hiropppe/9252028265d3838b920b to your computer and use it in GitHub Desktop.
Save hiropppe/9252028265d3838b920b to your computer and use it in GitHub Desktop.
## all
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > corpus.euc
iconv -f EUC-JP -t UTF-8 corpus.euc > corpus
mkdir -p model/knbc && rm -rf model/knbc/*
jdepp -t 0 -I 1 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.0008 -i 40 -p
jdepp -t 3 -I 1 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.0008 -i 40 -p -- -s 0.02 -i 5 -t 1
jdepp -t 0 -I 2 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.00005 -i 40 -p
jdepp -t 3 -I 2 -c corpus -m model/knbc -- -t 1 -d 2 -c 0.00005 -i 40 -p -- -- -s 0.005 -i 5 -t 1
cat corpus | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic > tagged
time cat tagged | jdepp -m model/knbc > result
time cat tagged | jdepp -m model/knbc > result
time cat tagged | jdepp -m model/knbc > result
python ../tools/eval.py result corpus
## cv
# corpus
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Keitai*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc1.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Kyoto*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc2.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Gourmet*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc3.euc
find ../KNBC_v1.0_090925/corpus1 -type f -name "KN*Sports*" | LC_ALL=C sort | xargs cat | python ../tools/knbc2kyoto.py KNP | python ../tools/replace_pos.py mecab -d /usr/local/lib/mecab/dic/jumandic > knbc4.euc
for i in `seq 1 1 4`;
do
iconv -f EUC-JP -t UTF-8 knbc$i.euc > knbc$i.utf8;
done
cat knbc2.utf8 knbc3.utf8 knbc4.utf8> corpus1
cat knbc1.utf8 > gold1
cat knbc3.utf8 knbc4.utf8 knbc1.utf8> corpus2
cat knbc2.utf8 > gold2
cat knbc4.utf8 knbc1.utf8 knbc2.utf8> corpus3
cat knbc3.utf8 > gold3
cat knbc1.utf8 knbc2.utf8 knbc3.utf8> corpus4
cat knbc4.utf8 > gold4
# learning chunk and dep, then test
for i in `seq 1 1 4`;
do
# jdepp -t 0 -I 1 -c corpus$i -m model -- -t 1 -d 2 -c 0.00005 -i 40 -p
mkdir -p model/knbc$i && rm -rf model/knbc$i/*
jdepp -t 0 -I 1 -c corpus$i -m model/knbc$i -- -t 1 -d 2 -c 0.0008 -i 40 -p
jdepp -t 3 -I 1 -c corpus$i -m model/knbc$i -- -t 1 -d 2 -c 0.0008 -i 40 -p -- -s 0.02 -i 5 -t 1;
jdepp -t 0 -I 2 -c corpus$i -m model/knbc$i -- -t 1 -d 2 -c 0.00005 -i 40 -p
jdepp -t 3 -I 2 -c corpus$i -m model/knbc$i -- -t 1 -d 2 -c 0.00005 -i 40 -p -- -- -s 0.005 -i 5 -t 1;
cat gold$i | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic | jdepp -m model/knbc$i > result$i
done
for i in `seq 1 1 4`;
do
python ../tools/eval.py result$i gold$i 2> eval$i;
done
cat eval[1-4] | python ../tools/eval_total.py
## learner = tinysvm
# all
jdepp -t 0 -I 1 -l 1 -c corpus -m model/knbc -- -t 0
svm_learn -t 1 -d 2 -c 1 model/knbc/chunk.train model/knbc/chunk
jdepp -t 3 -I 1 -l 1 -c corpus -m model/knbc -- -t 0 -- -s 0.02 -i 5 -t 1
jdepp -t 0 -I 2 -l 1 -c corpus -m model/knbc -- -t 0
svm_learn -t 1 -d 2 -c 1 model/knbc/depnd.p0.train model/knbc/depnd.p0
jdepp -t 3 -I 2 -l 1 -c corpus -m model/knbc -- -t 0 -- -- -s 0.005 -i 5 -t 1
cat corpus | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic | jdepp -m model/knbc > result
# cv
for i in `seq 1 1 4`;
do
mkdir -p model/knbc$i && rm -rf model/knbc$i/*
jdepp -t 0 -I 1 -c corpus$i -m model/knbc$i -- -t 0
svm_learn -t 1 -d 2 -c 1 model/knbc$i/chunk.train model/knbc$i/chunk
jdepp -t 3 -I 1 -c corpus$i -m model/knbc$i -- -t 0 -- -s 0.02 -i 5 -t 1;
jdepp -t 0 -I 2 -c corpus$i -m model/knbc$i -- -t 0
svm_learn -t 1 -d 2 -c 1 model/knbc$i/depnd.p0.train model/knbc$i/depnd.p0
jdepp -t 3 -I 2 -c corpus$i -m model/knbc$i -- -t 0 -- -- -s 0.005 -i 5 -t 1;
cat gold$i | python ../tools/to_sent.py | mecab -d /usr/local/lib/mecab/dic/jumandic | jdepp -m model/knbc$i > result$i
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment