samehkamaleldin · February 9, 2016 12:18
diff --git a/build-model.sh b/build-model.sh
 #!/bin/sh
 # ------------------------------------------------------------------------------
 # FILE        | build-model.sh
 # AUTHOR      | sameh kamal
 # DESCRIPTION | build a new concatinative model
 # ------------------------------------------------------------------------------

 db_name = $1                # database name
 dt_dir  = $2                # raw data dir
 ws_dir  = "~/tts-workspace" # workspace dir
 db_dir  = $ws_dir/$db_name  # database dir
 aux_dir = $ws_dir/aux
 phonemizer_continuous_dir = "~/univox/phonemizer-continuous"
 marytts_base_dir          = "~/git/marytts"

 # make workspace dir if not exists
 mkdir -p $db_dir

 if [ -d $db_dir ]; then
    # create database directories
    mkdir -p $db_dir
    mkdir -p $db_dir/text
    mkdir -p $db_dir/wav
    mkdir -p $db_dir/lab
    mkdir -p $db_dir/language

    # move raw data files to database directory
    cp $dt_dir/**/**/*.txt $db_dir/text
    cp $dt_dir/**/**/*.wav $db_dir/wav
    cp $dt_dir/**/*.lab    $db_dir/lab

    # enter database directory
    cd $db_dir
    # -------------------------------------------------------------------
    # create unique words file
    # -------------------------------------------------------------------
    # enter text directory
    cd text
    # put all sentences in one file in language directory
    awk 'FNR==1{print ""}1' *.txt > ../language/text_ar.txt
    # enter language directory
    cd ../language
    # get unique words from all sentences and put them in one file
    tr -s [:space:] \\n < text_ar.txt | sort | uniq > unique_words.txt
    # remove space and ',' from the head of file if exists
    ln     = $(head -n 1 unique_words.txt)
    ln_len = ${#ln}
    if [ $ln_len eq 1 ]
       sed -i '1d' unique_words.txt
    fi
    ln     = $(head -n 1 unique_words.txt)
    ln_len = ${#ln}
    if [ $ln_len eq 1 ]
       sed -i '1d' unique_words.txt
    fi
    # -------------------------------------------------------------------
    # replace SSIL and SIL with _ in labs files   
    # -------------------------------------------------------------------
    cd ../lab
    # replace all SSIL with _ in all files
    sed -i -- 's/SSIL/_/g' *
    # replace all SIL with _ in all files
    sed -i -- 's/SIL/_/g' *

    # -------------------------------------------------------------------
    # run phonemizer on unique words   
    # -------------------------------------------------------------------
    cd ../language
    java -jar $aux_dir/phonemize_cont.jar ./unique_words.txt ./ar_phon_dict.ph
 else
    echo "> database dir doesn't exist. "
 fi
diff --git a/ilive-tts.md b/ilive-tts.md
diff --git a/set-env-vals.sh b/set-env-vals.sh
 #!/bin/sh
 # ------------------------------------------------------------------------------
 # FILE        | set-env-vals.sh
 # AUTHOR      | sameh kamal
 # DESCRIPTION | set environment variables required for building tts models
 # ------------------------------------------------------------------------------

 export MARY_BASE_DIR=/home/sameh/work/ist
 export PHONEMIZER_SCRIPTS_DIR=/home/sameh/work/ist/phonemizer
	#!/bin/sh
	# ------------------------------------------------------------------------------
	# FILE \| build-model.sh
	# AUTHOR \| sameh kamal
	# DESCRIPTION \| build a new concatinative model
	# ------------------------------------------------------------------------------

	db_name = $1 # database name
	dt_dir = $2 # raw data dir
	ws_dir = "~/tts-workspace" # workspace dir
	db_dir = $ws_dir/$db_name # database dir
	aux_dir = $ws_dir/aux
	phonemizer_continuous_dir = "~/univox/phonemizer-continuous"
	marytts_base_dir = "~/git/marytts"

	# make workspace dir if not exists
	mkdir -p $db_dir

	if [ -d $db_dir ]; then
	# create database directories
	mkdir -p $db_dir
	mkdir -p $db_dir/text
	mkdir -p $db_dir/wav
	mkdir -p $db_dir/lab
	mkdir -p $db_dir/language

	# move raw data files to database directory
	cp $dt_dir///*.txt $db_dir/text
	cp $dt_dir///*.wav $db_dir/wav
	cp $dt_dir/*/.lab $db_dir/lab

	# enter database directory
	cd $db_dir
	# -------------------------------------------------------------------
	# create unique words file
	# -------------------------------------------------------------------
	# enter text directory
	cd text
	# put all sentences in one file in language directory
	awk 'FNR==1{print ""}1' *.txt > ../language/text_ar.txt
	# enter language directory
	cd ../language
	# get unique words from all sentences and put them in one file
	tr -s [:space:] \\n < text_ar.txt \| sort \| uniq > unique_words.txt
	# remove space and ',' from the head of file if exists
	ln = $(head -n 1 unique_words.txt)
	ln_len = ${#ln}
	if [ $ln_len eq 1 ]
	sed -i '1d' unique_words.txt
	fi
	ln = $(head -n 1 unique_words.txt)
	ln_len = ${#ln}
	if [ $ln_len eq 1 ]
	sed -i '1d' unique_words.txt
	fi
	# -------------------------------------------------------------------
	# replace SSIL and SIL with _ in labs files
	# -------------------------------------------------------------------
	cd ../lab
	# replace all SSIL with _ in all files
	sed -i -- 's/SSIL/_/g' *
	# replace all SIL with _ in all files
	sed -i -- 's/SIL/_/g' *

	# -------------------------------------------------------------------
	# run phonemizer on unique words
	# -------------------------------------------------------------------
	cd ../language
	java -jar $aux_dir/phonemize_cont.jar ./unique_words.txt ./ar_phon_dict.ph
	else
	echo "> database dir doesn't exist. "
	fi
	#!/bin/sh
	# ------------------------------------------------------------------------------
	# FILE \| set-env-vals.sh
	# AUTHOR \| sameh kamal
	# DESCRIPTION \| set environment variables required for building tts models
	# ------------------------------------------------------------------------------

	export MARY_BASE_DIR=/home/sameh/work/ist
	export PHONEMIZER_SCRIPTS_DIR=/home/sameh/work/ist/phonemizer