Created
November 18, 2019 04:10
-
-
Save mzaksana/0a150d4ab9d372b1131cebcc65820a1e to your computer and use it in GitHub Desktop.
data prepare log cmd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cd ../../Program/ | |
ls | |
cd controllers/ | |
ls | |
cat Program.php | |
ls | |
cd /home/mza/Documents/Pro/IndoAcro/data/core/ | |
ls | |
cd .. | |
ls | |
cd base/ | |
ls | |
cd .. | |
ls | |
mkdir tmp-1 | |
cd tmp-1/ | |
l | |
scp -P 222 [email protected]:~/data/indoacro/tmp/* . | |
cd /var/www/html/IndoAcro/ | |
l | |
php index.php Program makeTable | |
88 | |
144 | |
ls | |
cd application/modules/CoreApi/per | |
cd application/modules/CoreApi/perl/l | |
cd application/modules/CoreApi/perl/ | |
ls | |
perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180101-20180228.txt && perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180701-20180829.txt && perl insertData.pl ~/Documents/Pro/IndoAcro/data/tmp-1/20180901-20181031.txt | |
ssh [email protected] -p 222 | |
mysql -u pro -p | |
ls | |
history | |
mysql -u pro -p | |
cd | |
cd Rains/ | |
cd SpeeachRC/ | |
ls | |
cd d | |
cd data/ | |
ls | |
cd mapping/ | |
ls | |
cp text.only corpus.txt | |
vim corpus.txt | |
cut corpus.txt -d' ' -f2- > tmp | |
cat tmp | |
mv tmp corpus.txt | |
cat tmp | |
cat corpus.txt | |
ls | |
cp corpus.txt vocab.txt | |
vim corpus.txt | |
vim vocab.txt | |
tr text.only | |
tr text.only "\s" "\n" | |
tr "\s" "\n" vocab.txt | |
cat vocab.txt | tr "\s" "\n" | |
cat vocab.txt | tr " " "\n" | |
cat vocab.txt | tr " " "\n" > vocab.txt | |
cat vocab.txt | |
cp corpus.txt vocab.txt | |
cat vocab.txt | tr " " "\n" > vocab.txt.d | |
cat vocab.txt | |
cat vocab.txt.d | |
mv vocab.txt.d vocab.txt | |
ls | |
cat vocab.txt | |
cat vocab.txt | sort aa | |
cat vocab.txt | sort > aa | |
mv aa vocab.txt | |
ls | |
cat vocab.txt | |
cat vocab.txt | sort | uniq> aa | |
mv aa vocab.txt | |
cat vocab.txt | |
l | |
ls | |
cat corpus.txt | |
l | |
cat vocab.txt | |
ls | |
mv vocab.txt vocab-full.txt | |
ls | |
cp corpus.txt vocab-full.txt ../../../Tools/xzkaldi/egs/zcommonvoice/s5/data/dataset/ | |
ls | |
cd Rains/Tools/ | |
ls | |
cd xzkaldi/ | |
l | |
cd egs/commonvoice/ | |
ls | |
cd s5/ | |
ls | |
ls exp/ | |
ls exp/make_mfcc/valid_train/ | |
cd .. | |
ls | |
cd s5/ | |
cat run.sh | |
vim run.sh | |
ls | |
cd data/ | |
ls | |
cd valid_train/ | |
l | |
cd .. | |
ls | |
rm -r lang lang_test/ local/ | |
sudo rm -r lang lang_test/ local/ | |
ls | |
clear | |
ls | |
mkdir local | |
cd local/ | |
l | |
mkdir dict | |
ls | |
cd dict/ | |
ls | |
source ../../../../../../tools/env.sh | |
cp ../../valid_train/text . | |
ls | |
head text | |
vim text | |
cut -d' ' text -f2- > corpus.txt | |
cat corpus.txt | |
ls | |
vim corpus.txt | |
ls | |
ngram-count -order 3 -write-vocab vocab-full.txt -wbdiscount -text corpus.txt -lm lm.gz | |
ls | |
gunzip -k lm.gz | |
vim lm | |
ls | |
cd .. | |
ls | |
svn co http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict cmudict | |
ls | |
cd cmudict/ | |
k | |
l | |
perl scripts/make_baseform.pl cmudict.0.7a /dev/stdout | sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' | tr '[A-Z]' '[a-z]' > cmudict-plain.txt | |
vim cmudict-plain.txt | |
vim cmudict.0.7a | |
cd ../dict/ | |
ls | |
awk 'NR==FNR{words[$1]; next;} !($1 in words)' \ | |
awk 'NR==FNR{words[$1]; next;} !($1 in words)' | |
awk 'NR==FNR{words[$1]; next;} !($1 in words)' ../cmudict/cmudict-plain.txt vocab-full.txt | egrep -v '<.?s>' > vocab-oov.txt | |
ls | |
vim vocab-oov.txt | |
ls | |
wget http://www.speech.cs.cmu.edu/tools/product/1574044008_32187/7626.dict -O lexicon-oov.txt | |
ls | |
vim lexicon-oov.txt | |
ls | |
awk 'NR==FNR{words[$1]; next;} ($1 in words)' vocab-full.txt ../cmudict/cmudict-plain.txt | egrep -v '<.?s>' > lexicon-iv.txt | |
l | |
cat lexicon-oov.txt lexicon-iv.txt | sort > lexicon.txt | |
echo SIL > silence_phones.txt | |
echo SIL > optional_silence.txt | |
grep -v -w sil lexicon.txt | awk '{for(n=2;n<=NF;n++) { p[$n]=1; }} END{for(x in p) {print x}}' | sort > nonsilence_phones.txt | |
l | |
echo -e "<unk>\tSIL" >> lexicon.txt | |
touch extra_questions.txt | |
vim lexicon.txt | |
vim nonsilence_phones.txt | |
cd .. | |
ls | |
cd ../.. | |
utils/prepare_lang.sh data/local/dict '<unk>' data/local/lang data/lang | |
ls | |
cd data/lang/ | |
l | |
vim phones.txt | |
ls | |
cd ../.. | |
l | |
cd conf/ | |
ls | |
vim mfcc.conf | |
vim mfcc_hires.conf | |
ls | |
vim mfcc_hires.conf | |
ls | |
cd .. | |
l | |
ls | |
steps/make_mfcc.sh | |
steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 4 --cmd "run.pl" data/valid_test data/valid_test/log data/valid_test/feats | |
cd data/valid_test/ | |
ls | |
cd log/ | |
l | |
vim make_mfcc_valid_test.1.log | |
history | |
history | tail 2034 | |
history | tail -2034 | |
history | tail -200 > ~/Documents/Data/log | |
2---------------- | |
cut corpus.txt -d' ' -f2- > tmp | |
cat tmp | |
mv tmp corpus.txt | |
cat tmp | |
cat corpus.txt | |
ls | |
cp corpus.txt vocab.txt | |
vim corpus.txt | |
vim vocab.txt | |
tr text.only | |
tr text.only "\s" "\n" | |
tr "\s" "\n" vocab.txt | |
cat vocab.txt | tr "\s" "\n" | |
cat vocab.txt | tr " " "\n" | |
cat vocab.txt | tr " " "\n" > vocab.txt | |
cat vocab.txt | |
cp corpus.txt vocab.txt | |
cat vocab.txt | tr " " "\n" > vocab.txt.d | |
cat vocab.txt | |
cat vocab.txt.d | |
mv vocab.txt.d vocab.txt | |
ls | |
cat vocab.txt | |
cat vocab.txt | sort aa | |
cat vocab.txt | sort > aa | |
mv aa vocab.txt | |
ls | |
cat vocab.txt | |
cat vocab.txt | sort | uniq> aa | |
mv aa vocab.txt | |
cat vocab.txt | |
l | |
ls | |
cat corpus.txt | |
l | |
cat vocab.txt | |
ls | |
mv vocab.txt vocab-full.txt | |
ls | |
cp corpus.txt vocab-full.txt ../../../Tools/xzkaldi/egs/zcommonvoice/s5/data/dataset/ | |
ls | |
vim local/download_and_untar.sh | |
vim local/prepare_lm.sh | |
vim local/prepare_dict.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment