AdolfVonKleist · April 9, 2014 07:16 · AdolfVonKleist · Apr 9, 2014
diff --git a/little-test.sh b/little-test.sh
 #!/bin/bash

 if [ $# -ne 2 ]
 then
    echo "USAGE: $0 <lexicon> <order>"
    echo "  Recommended: order=6~9 for English"
    exit
 fi

 #Triage the lexicon a bit to overcome my poor design
 cat ${1} \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\s+/);
                 $w = shift(@_);
                 $p = join(" ",@_);
                 $p =~ s/_/,/g;
                 print $w."\t".$p."\n";
               }' \
    > ${1}.triaged.lex

 #Run the aligner with some reasonable params (for English)
 phonetisaurus-align --input=${1}.triaged.lex --ofile=${1}.triaged.corpus --seq1_del=false
 echo ""

 #Train an n-gram model
 # Note: You can use *any* tool that outputs a valid ARPA-format LM
 # Recommended: Interpolate a Kneser-Ney model with a MaxEnt model (latest SRILM),
 #   rescore with an RNNLM (if you want to get fancy)
 # If possible results can be further improved by pruning pronunciations with
 # a further forced alignment step with your recognizer
 estimate-ngram -o ${2} -t ${1}.triaged.corpus -s FixModKN -wl ${1}.triaged.${2}g.arpa
 echo ""

 #Convert the model
 phonetisaurus-arpa2wfst-omega --lm=${1}.triaged.${2}g.arpa --ofile=${1}.triaged.${2}g.fst
 echo ""

 #Test an input word, and fix the phoneme we triaged
 phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=A-bomb \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\t/);
                 $_[2] =~ s/,/_/g;
                 print join("\t",@_)."\n";
              }'
 echo ""

 #Test a list of input words, get the 5-best, and fixe the phoneme we triaged
 for w in A-frame A-line; do echo $w; done > short.wlist

 phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=short.wlist --isfile=true \
    --nbest=5 --decoder_type=fst_phi \
    | perl -e'while(<>){
                 chomp;
                 @_ = split(/\t/);
                 $_[2] =~ s/,/_/g;
                 print join("\t",@_)."\n";
              }'

 echo ""
	#!/bin/bash

	if [ $# -ne 2 ]
	then
	echo "USAGE: $0 <lexicon> <order>"
	echo " Recommended: order=6~9 for English"
	exit
	fi

	#Triage the lexicon a bit to overcome my poor design
	cat ${1} \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\s+/);
	$w = shift(@_);
	$p = join(" ",@_);
	$p =~ s/_/,/g;
	print $w."\t".$p."\n";
	}' \
	> ${1}.triaged.lex

	#Run the aligner with some reasonable params (for English)
	phonetisaurus-align --input=${1}.triaged.lex --ofile=${1}.triaged.corpus --seq1_del=false
	echo ""

	#Train an n-gram model
	# Note: You can use any tool that outputs a valid ARPA-format LM
	# Recommended: Interpolate a Kneser-Ney model with a MaxEnt model (latest SRILM),
	# rescore with an RNNLM (if you want to get fancy)
	# If possible results can be further improved by pruning pronunciations with
	# a further forced alignment step with your recognizer
	estimate-ngram -o ${2} -t ${1}.triaged.corpus -s FixModKN -wl ${1}.triaged.${2}g.arpa
	echo ""

	#Convert the model
	phonetisaurus-arpa2wfst-omega --lm=${1}.triaged.${2}g.arpa --ofile=${1}.triaged.${2}g.fst
	echo ""

	#Test an input word, and fix the phoneme we triaged
	phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=A-bomb \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\t/);
	$_[2] =~ s/,/_/g;
	print join("\t",@_)."\n";
	}'
	echo ""

	#Test a list of input words, get the 5-best, and fixe the phoneme we triaged
	for w in A-frame A-line; do echo $w; done > short.wlist

	phonetisaurus-g2p-omega --model=${1}.triaged.${2}g.fst --input=short.wlist --isfile=true \
	--nbest=5 --decoder_type=fst_phi \
	\| perl -e'while(<>){
	chomp;
	@_ = split(/\t/);
	$_[2] =~ s/,/_/g;
	print join("\t",@_)."\n";
	}'

	echo ""
No results found