Skip to content

Instantly share code, notes, and snippets.

@taroushirani
Created May 18, 2020 01:48
Show Gist options
  • Save taroushirani/3ec4e00c3d7fbb140fc58dd8321ffedf to your computer and use it in GitHub Desktop.
Save taroushirani/3ec4e00c3d7fbb140fc58dd8321ffedf to your computer and use it in GitHub Desktop.
nit_song070_00_svs_world
#!/bin/bash
script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
NNSVS_ROOT="/content/nnsvs"
# Directory
# **CHANGE** this to your database path
nit_song070_root="/content/gdrive/HTS-demo_NIT-SONG070-F001"
spk="nit_song070"
dumpdir=dump
# HTS-style question used for extracting musical/linguistic context from musicxml files
question_path="$NNSVS_ROOT/egs/kiritan_singing/00-svs-world/conf/jp_qst001_nnsvs.hed"
stage=0
stop_stage=0
# exp tag
tag="" # tag for managing experiments.
. $NNSVS_ROOT/utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
function xrun () {
set -x
$@
set +x
}
train_set="train_no_dev"
dev_set="dev"
eval_set="eval"
datasets=($train_set $dev_set $eval_set)
testsets=($dev_set $eval_set)
dump_org_dir=$dumpdir/$spk/org
dump_norm_dir=$dumpdir/$spk/norm
# exp name
if [ -z ${tag} ]; then
expname=${spk}
else
expname=${spk}_${tag}
fi
expdir=exp/$expname
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
if [ ! -e downloads/nit_song070 ]; then
echo "stage -1: Downloading data"
mkdir -p downloads
git clone https://github.com/taroushirani/nnsvs_nit_song070.git downloads/nit_song070
fi
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo "stage 0: Data preparation"
nit_song070=downloads/nit_song070
cd $nit_song070 && git checkout .
echo "" >> config.py
echo "nit_song070_root = \"$nit_song070_root\"" >> config.py
./run.sh
cd -
mkdir -p data/list
cp -r $PWD/$nit_song070/nit_song070_extra/timelag data/timelag
cp -r $PWD/$nit_song070/nit_song070_extra/duration data/duration
cp -r $PWD/$nit_song070/nit_song070_extra/acoustic data/acoustic
echo "train/dev/eval split"
find data/acoustic/ -type f -name "*.wav" -exec basename {} .wav \; \
| sort > data/list/utt_list.txt
grep 003_ data/list/utt_list.txt > data/list/$eval_set.list
grep 004_ data/list/utt_list.txt > data/list/$dev_set.list
grep -v 003_ data/list/utt_list.txt | grep -v 004_ > data/list/$train_set.list
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: Feature generation"
for s in ${datasets[@]};
do
nnsvs-prepare-features utt_list=data/list/$s.list out_dir=$dump_org_dir/$s/ \
question_path=$question_path
done
# Compute normalization stats for each input/output
mkdir -p $dump_norm_dir
for inout in "in" "out"; do
if [ $inout = "in" ]; then
scaler_class="sklearn.preprocessing.MinMaxScaler"
else
scaler_class="sklearn.preprocessing.StandardScaler"
fi
for typ in timelag duration acoustic;
do
find $dump_org_dir/$train_set/${inout}_${typ} -name "*feats.npy" > train_list.txt
scaler_path=$dump_org_dir/${inout}_${typ}_scaler.joblib
nnsvs-fit-scaler list_path=train_list.txt scaler.class=$scaler_class \
out_path=$scaler_path
rm -f train_list.txt
cp -v $scaler_path $dump_norm_dir/${inout}_${typ}_scaler.joblib
done
done
# apply normalization
for s in ${datasets[@]}; do
for inout in "in" "out"; do
for typ in timelag duration acoustic;
do
nnsvs-preprocess-normalize in_dir=$dump_org_dir/$s/${inout}_${typ}/ \
scaler_path=$dump_org_dir/${inout}_${typ}_scaler.joblib \
out_dir=$dump_norm_dir/$s/${inout}_${typ}/
done
done
done
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "stage 2: Training time-lag model"
xrun nnsvs-train data.train_no_dev.in_dir=$dump_norm_dir/$train_set/in_timelag/ \
data.train_no_dev.out_dir=$dump_norm_dir/$train_set/out_timelag/ \
data.dev.in_dir=$dump_norm_dir/$dev_set/in_timelag/ \
data.dev.out_dir=$dump_norm_dir/$dev_set/out_timelag/ \
model=timelag train.out_dir=$expdir/timelag
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "stage 3: Training phoneme duration model"
xrun nnsvs-train data.train_no_dev.in_dir=$dump_norm_dir/$train_set/in_duration/ \
data.train_no_dev.out_dir=$dump_norm_dir/$train_set/out_duration/ \
data.dev.in_dir=$dump_norm_dir/$dev_set/in_duration/ \
data.dev.out_dir=$dump_norm_dir/$dev_set/out_duration/ \
model=duration train.out_dir=$expdir/duration
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
echo "stage 4: Training acoustic model"
xrun nnsvs-train data.train_no_dev.in_dir=$dump_norm_dir/$train_set/in_acoustic/ \
data.train_no_dev.out_dir=$dump_norm_dir/$train_set/out_acoustic/ \
data.dev.in_dir=$dump_norm_dir/$dev_set/in_acoustic/ \
data.dev.out_dir=$dump_norm_dir/$dev_set/out_acoustic/ \
model=acoustic train.out_dir=$expdir/acoustic
fi
# NOTE: step 5 does not generate waveform. It just saves neural net's outputs.
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
echo "stage 5: Generation features from timelag/duration/acoustic models"
for s in ${testsets[@]}; do
for typ in timelag duration acoustic; do
checkpoint=$expdir/$typ/latest.pth
name=$(basename $checkpoint)
xrun nnsvs-generate model.checkpoint=$checkpoint \
model.model_yaml=$expdir/$typ/model.yaml \
out_scaler_path=$dump_norm_dir/out_${typ}_scaler.joblib \
in_dir=$dump_norm_dir/$s/in_${typ}/ \
out_dir=$expdir/$typ/predicted/$s/${name%.*}/
done
done
fi
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
echo "stage 6: Synthesis waveforms"
for s in ${testsets[@]}; do
for input in label_phone_score label_phone_align; do
if [ $input = label_phone_score ]; then
ground_truth_duration=false
else
ground_truth_duration=true
fi
xrun nnsvs-synthesis question_path=conf/jp_qst001_nnsvs.hed \
timelag.checkpoint=$expdir/timelag/latest.pth \
timelag.in_scaler_path=$dump_norm_dir/in_timelag_scaler.joblib \
timelag.out_scaler_path=$dump_norm_dir/out_timelag_scaler.joblib \
timelag.model_yaml=$expdir/timelag/model.yaml \
duration.checkpoint=$expdir/duration/latest.pth \
duration.in_scaler_path=$dump_norm_dir/in_duration_scaler.joblib \
duration.out_scaler_path=$dump_norm_dir/out_duration_scaler.joblib \
duration.model_yaml=$expdir/duration/model.yaml \
acoustic.checkpoint=$expdir/acoustic/latest.pth \
acoustic.in_scaler_path=$dump_norm_dir/in_acoustic_scaler.joblib \
acoustic.out_scaler_path=$dump_norm_dir/out_acoustic_scaler.joblib \
acoustic.model_yaml=$expdir/acoustic/model.yaml \
utt_list=./data/list/$s.list \
in_dir=data/acoustic/$input/ \
out_dir=$expdir/synthesis/$s/latest/$input \
ground_truth_duration=$ground_truth_duration
done
done
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment