Created
March 13, 2013 01:52
-
-
Save ikegami-yukino/5148780 to your computer and use it in GitHub Desktop.
交差検定検定用スクリプト
(ファイルを行ごとにランダムに分割してトレーニング用コマンドと検定用コマンドを実行する)
For N-fold cross validation, execute given commands, for training and testing, after randomize and split data.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check the number of parameters | |
if [ $# -ne 4 ]; then | |
echo "usage: ./cv.sh [FILE] [division number] [train command] [test command]" 1>&2 | |
echo "The file for training is named as trainfile" 1>&2 | |
echo "The file for testing is named as testfile" 1>&2 | |
echo "for example:" 1>&2 | |
echo './cv.sh data 5 "opal trainfile train.model -" "opal - train.model testfile"' 1>&2 | |
exit 1 | |
fi | |
file=`echo $1` # filename | |
fold=`echo $2` # division number | |
traincommand=`echo $3` | |
testcommand=`echo $4` | |
# Randomsort for given file | |
cat $file | awk 'BEGIN { srand() } { print rand() "\t" $0 }' | sort -n | cut -f 2- > tempfile | |
# Count lines of given file | |
line_count=`wc -l $file|sed 's/^ *//g'|cut -f 1 -d ' '` | |
# Split randomized file | |
split -l `expr $line_count / $fold` tempfile splited | |
# Add remained lines to each file | |
remainder=`expr $line_count % $fold` | |
if `test $remainder -ne 0`; then | |
for i in `seq 1 $remainder` | |
do | |
writefile=`ls | grep 'splited' | head -n $i| tail -n 1` | |
tail -n $i tempfile | head -n 1 >> $writefile | |
done | |
fi | |
# Delete unnecessary file | |
filecount=0 | |
for i in `ls | grep 'splited'` | |
do | |
if `test $filecount -lt $fold`; then | |
filecount=`expr $filecount + 1` | |
else | |
rm $i | |
fi | |
done | |
# Cross-validation | |
for i in `seq 1 $fold` | |
do | |
filecount=1 | |
for j in `ls | grep 'splited'` | |
do | |
if `test $filecount -ne $i`; then | |
joinfiles="${joinfiles} ${j}" | |
else | |
cat $j > testfile | |
fi | |
filecount=`expr $filecount + 1` | |
done | |
cat $joinfiles > trainfile | |
$traincommand | |
$testcommand | |
unset joinfiles | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment