wasertech · January 22, 2023 23:18
diff --git a/lm_optimize.log b/lm_optimize.log




                                                         ______           _       _                _       ___                      __
                                                        /_  __/________ _(_)___  (_)___  ____ _   | |     / (_)___  ____ __________/ /
                                                         / / / ___/ __ `/ / __ \/ / __ \/ __ `/   | | /| / / /_  / / __ `/ ___/ __  / 
                                                        / / / /  / /_/ / / / / / / / / / /_/ /    | |/ |/ / / / /_/ /_/ / /  / /_/ /  
                                                       /_/ /_/   \__,_/_/_/ /_/_/_/ /_/\__, /     |__/|__/_/ /___/\__,_/_/   \__,_/   
                                                                                      /____/                                          


                                                                                           [ STT ]



 Assistant: Trainer directory has been correctly setup.

         : Before we start training, we need a few info about your models.

     [?] : Enter a username [waser]: 

     [?] : Enter your contact info [[email protected]]: 

     [?] : Enter models version [0.0.1]: 


     (i) : Spawning training container
 + dirname /home/trainer/run.sh
 + THIS=/home/trainer
 + export PATH=/home/trainer:/home/trainer/fr_custom:/home/trainer/kenlm/build/bin/:/home/trainer/stt-train/bin:/home/trainer/stt-train/bin:/home/trainer/tf-venv/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin
 + export TF_CUDNN_RESET_RND_GEN_STATE=1
 + env
 DALI_BUILD=3728186
 ENGLISH_COMPATIBLE=0
 OMPI_MCA_pml=^ucx
 LIBRARY_PATH=/usr/local/cuda/lib64/stubs:
 CV_PERSONAL_SECOND_URL=
 PYTHONIOENCODING=utf-8
 LOG_LEVEL=1
 LANGUAGE=fr
 TF_ADJUST_SATURATION_FUSED=1
 CC_DIR=/home/trainer/cc
 CUSOLVER_VERSION=11.3.2.55
 HOSTNAME=4f7b85f34bae
 TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT=1
 CUTENSOR_VERSION=1.4.0.6
 DLPROF_VERSION=
 LM_TOP_K=500000
 SHLVL=0
 LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
 HOME=/home/trainer
 TENSORFLOW_VERSION=1.15.5
 DROPOUT=0.3
 SKIP_BATCH_TEST=1
 CUDA_CACHE_DISABLE=1
 PYVER=3.8
 DUPLICATE_SENTENCE_COUNT=1
 BAZELRC=/root/.bazelrc
 ENV=/etc/shinit_v2
 TRAIN_BATCH_SIZE=64
 RDMACORE_VERSION=36.0
 ENABLE_AUGMENTS=0
 NVJPEG_VERSION=11.6.0.55
 NVIDIA_BUILD_ID=32060646
 CUDA_VERSION=11.6.0.021
 NVM_DIR=/usr/local/nvm
 CUBLAS_VERSION=11.8.1.74
 NSIGHT_SYSTEMS_VERSION=2021.5.2.53
 AMP=0
 MODEL_LANGUAGE=fr_custom
 OPAL_PREFIX=/opt/hpcx/ompi
 NVIDIA_REQUIRE_CUDA=cuda>=9.0
 N_HIDDEN=2048
 TRT_VERSION=8.2.3.0+cuda11.4.2.006
 GDRCOPY_VERSION=2.3
 _=/home/trainer/run.sh
 NVIDIA_DRIVER_CAPABILITIES=compute,utility,video
 CURAND_VERSION=10.2.9.55
 MOFED_VERSION=5.4-rdmacore36.0
 BEAM_WIDTH=500
 TERM=xterm
 TF_CUDNN_RESET_RND_GEN_STATE=1
 LEARNING_RATE=0.0001
 NVIDIA_TENSORFLOW_VERSION=22.02-tf1
 PATH=/home/trainer:/home/trainer/fr_custom:/home/trainer/kenlm/build/bin/:/home/trainer/stt-train/bin:/home/trainer/stt-train/bin:/home/trainer/tf-venv/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin
 GID=1018
 JUPYTER_PORT=8888
 CUDA_DRIVER_VERSION=510.39.01
 _CUDA_COMPAT_STATUS=System has unsupported display driver / cuda driver combination (CUDA_ERROR_SYSTEM_DRIVER_MISMATCH) cuInit()=803
 MODEL_VERSION=0.0.1
 LOCATION=CH
 NVIDIA_PRODUCT_NAME=TensorFlow
 LANG=C.UTF-8
 NPP_VERSION=11.6.0.55
 LM_ADD_EXCLUDED_MAX_SEC=1
 TENSORBOARD_PORT=6006
 DEV_BATCH_SIZE=64
 STT_DIR=/home/trainer/stt
 [email protected]
 CUFFT_VERSION=10.7.0.55
 AUTHOR_USERNAME=waser
 TF_ENABLE_WINOGRAD_NONFUSED=1
 VIRTUAL_ENV_NAME=stt-train
 CUDNN_VERSION=8.3.2.44+cuda11.5
 NSIGHT_COMPUTE_VERSION=2022.1.0.12
 DALI_VERSION=1.10.0
 UID=1018
 DEBIAN_FRONTEND=noninteractive
 SHELL=/bin/bash
 CV_PERSONAL_FIRST_URL=
 LM_EVALUATE_RANGE=2,4,50
 OPENMPI_VERSION=4.1.2rc4
 TEST_BATCH_SIZE=64
 TRTOSS_VERSION=22.02
 OMPI_MCA_coll_hcoll_enable=0
 LM_ALPHA=0.0
 CUSPARSE_VERSION=11.7.1.55
 VIRTUAL_ENV=/home/trainer/stt-train
 LM_BETA=0.0
 BASH_ENV=/etc/bash.bashrc
 STT_SHA1=fcec06bdd89f6ae68e2599495e8471da5e5ba45e
 EPOCHS=40
 PWD=/home/trainer
 LC_ALL=C.UTF-8
 EARLY_STOP=1
 HOMEDIR=/home/trainer
 _CUDA_COMPAT_PATH=/usr/local/cuda/compat
 TF_ADJUST_HUE_FUSED=1
 NVIDIA_VISIBLE_DEVICES=all
 NCCL_VERSION=2.11.4
 STT_BRANCH=fcec06bdd89f6ae68e2599495e8471da5e5ba45e
 OPENUCX_VERSION=1.12.0
 HPCX_VERSION=2.10
 TENSORBOARD_DEBUGGER_PORT=6064
 TF_AUTOTUNE_THRESHOLD=2
 + checks.sh
 + sudo id
 uid=0(root) gid=0(root) groups=0(root)
 + sudo /sbin/ldconfig
 + nvidia-smi
 Sun Jan 22 23:17:08 2023       
 +-----------------------------------------------------------------------------+
 | NVIDIA-SMI 525.60.11    Driver Version: 525.60.11    CUDA Version: 12.0     |
 |-------------------------------+----------------------+----------------------+
 | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
 |                               |                      |               MIG M. |
 |===============================+======================+======================|
 |   0  NVIDIA GeForce ...  Off  | 00000000:43:00.0  On |                  N/A |
 |  0%   35C    P8    19W / 170W |   1863MiB / 12288MiB |      2%      Default |
 |                               |                      |                  N/A |
 +-------------------------------+----------------------+----------------------+
                                                                               
 +-----------------------------------------------------------------------------+
 | Processes:                                                                  |
 |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
 |        ID   ID                                                   Usage      |
 |=============================================================================|
 +-----------------------------------------------------------------------------+
 ++ find /mnt/ -maxdepth 1 -type d
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/ ...'
 Checking /mnt/ ...
 + '[' '!' -w /mnt/ ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/lm ...'
 Checking /mnt/lm ...
 + '[' '!' -w /mnt/lm ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/sources ...'
 Checking /mnt/sources ...
 + '[' '!' -w /mnt/sources ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/extracted ...'
 Checking /mnt/extracted ...
 + '[' '!' -w /mnt/extracted ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/tmp ...'
 Checking /mnt/tmp ...
 + '[' '!' -w /mnt/tmp ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/helpers ...'
 Checking /mnt/helpers ...
 + '[' '!' -w /mnt/helpers ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/models ...'
 Checking /mnt/models ...
 + '[' '!' -w /mnt/models ']'
 + for dir in $(find /mnt/ -maxdepth 1 -type d)
 + echo 'Checking /mnt/checkpoints ...'
 Checking /mnt/checkpoints ...
 + '[' '!' -w /mnt/checkpoints ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/sources/fr ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/extracted/fr ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/checkpoints/fr ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/models/fr ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/lm/fr ']'
 + for subdir in sources extracted checkpoints models lm tmp
 + '[' '!' -d /mnt/tmp/fr ']'
 + mkdir /mnt/extracted/fr/data/
 mkdir: cannot create directory ‘/mnt/extracted/fr/data/’: File exists
 + true
 + python -c 'import tensorflow as tf; tf.test.is_gpu_available()'
 2023-01-22 23:17:08.995787: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
 WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
 2023-01-22 23:17:10.088028: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3494740000 Hz
 2023-01-22 23:17:10.090313: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x23069b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2023-01-22 23:17:10.090353: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2023-01-22 23:17:10.093420: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
 2023-01-22 23:17:10.149360: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2271bb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
 2023-01-22 23:17:10.149393: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
 2023-01-22 23:17:10.149834: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1666] Found device 0 with properties: 
 name: NVIDIA GeForce RTX 3060 major: 8 minor: 6 memoryClockRate(GHz): 1.777
 pciBusID: 0000:43:00.0
 2023-01-22 23:17:10.149866: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
 2023-01-22 23:17:10.162553: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
 2023-01-22 23:17:10.189232: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
 2023-01-22 23:17:10.190587: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10
 2023-01-22 23:17:10.196758: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11
 2023-01-22 23:17:10.210663: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11
 2023-01-22 23:17:10.210794: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
 2023-01-22 23:17:10.211126: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1794] Adding visible gpu devices: 0
 2023-01-22 23:17:10.211359: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
 2023-01-22 23:17:11.094163: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1206] Device interconnect StreamExecutor with strength 1 edge matrix:
 2023-01-22 23:17:11.094205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1212]      0 
 2023-01-22 23:17:11.094212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1225] 0:   N 
 2023-01-22 23:17:11.094849: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1351] Created TensorFlow device (/device:GPU:0 with 8156 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:43:00.0, compute capability: 8.6)
 + pushd /home/trainer/stt
 ~/stt ~
 + ./bin/run-ci-ldc93s1_new.sh 2 16000
 + ldc93s1_dir=./data/smoke_test
 + ldc93s1_csv=./data/smoke_test/ldc93s1.csv
 + epoch_count=2
 + audio_sample_rate=16000
 + [ ! -f ./data/smoke_test/ldc93s1.csv ]
 + export CUDA_VISIBLE_DEVICES=0
 + python -u train.py --alphabet_config_path data/alphabet.txt --show_progressbar false --early_stop false --train_files ./data/smoke_test/ldc93s1.csv --train_batch_size 1 --feature_cache /tmp/ldc93s1_cache --dev_files ./data/smoke_test/ldc93s1.csv --dev_batch_size 1 --test_files ./data/smoke_test/ldc93s1.csv --test_batch_size 1 --n_hidden 100 --epochs 2 --max_to_keep 1 --checkpoint_dir /tmp/ckpt --learning_rate 0.001 --dropout_rate 0.05 --export_dir /tmp/train --scorer_path data/smoke_test/pruned_lm.scorer --audio_sample_rate 16000 --export_tflite false
 Using the top level train.py script is deprecated and will be removed in a future release. Instead use: python -m coqui_stt_training.train
 WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
 I Performing dummy training to check for memory problems.
 I If the following process crashes, you likely have batch sizes that are too big for your available system memory (or GPU memory).
 I Could not find best validating checkpoint.
 I Could not find most recent checkpoint.
 I Initializing all variables.
 I STARTING Optimization
 I Training epoch 0...
 I Finished training epoch 0 - loss: 327.651917
 I Validating epoch 0 on ./data/smoke_test/ldc93s1.csv...
 I Finished validating epoch 0 on ./data/smoke_test/ldc93s1.csv - loss: 286.126099
 --------------------------------------------------------------------------------
 I FINISHED optimization in 0:00:03.934428
 I Dummy run finished without problems, now starting real training process.
 I STARTING Optimization
 I Training epoch 0...
 I Finished training epoch 0 - loss: 327.651917
 I Validating epoch 0 on ./data/smoke_test/ldc93s1.csv...
 I Finished validating epoch 0 on ./data/smoke_test/ldc93s1.csv - loss: 286.126129
 I Saved new best validating model with loss 286.126129 to: /tmp/ckpt/best_dev-1
 --------------------------------------------------------------------------------
 I Training epoch 1...
 I Finished training epoch 1 - loss: 288.629486
 I Validating epoch 1 on ./data/smoke_test/ldc93s1.csv...
 I Finished validating epoch 1 on ./data/smoke_test/ldc93s1.csv - loss: 244.849197
 I Saved new best validating model with loss 244.849197 to: /tmp/ckpt/best_dev-2
 --------------------------------------------------------------------------------
 I FINISHED optimization in 0:00:01.952360
 W Specifying --test_files when calling train module. Use python -m coqui_stt_training.evaluate Using the training module as a generic driver for all training related functionality is deprecated and will be removed soon. Use the specific modules: 
 W     python -m coqui_stt_training.train
 W     python -m coqui_stt_training.evaluate
 W     python -m coqui_stt_training.export
 W     python -m coqui_stt_training.training_graph_inference
 I Loading best validating checkpoint from /tmp/ckpt/best_dev-2
 I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
 I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
 I Loading variable from checkpoint: global_step
 I Loading variable from checkpoint: layer_1/bias
 I Loading variable from checkpoint: layer_1/weights
 I Loading variable from checkpoint: layer_2/bias
 I Loading variable from checkpoint: layer_2/weights
 I Loading variable from checkpoint: layer_3/bias
 I Loading variable from checkpoint: layer_3/weights
 I Loading variable from checkpoint: layer_5/bias
 I Loading variable from checkpoint: layer_5/weights
 I Loading variable from checkpoint: layer_6/bias
 I Loading variable from checkpoint: layer_6/weights
 Testing model on ./data/smoke_test/ldc93s1.csv
 I Test epoch...
 Test on ./data/smoke_test/ldc93s1.csv - WER: 1.000000, CER: 0.788462, loss: 244.849197
 --------------------------------------------------------------------------------
 Best WER: 
 --------------------------------------------------------------------------------
 WER: 1.000000, CER: 0.788462, loss: 244.849197
 - wav: data/smoke_test/LDC93S1.wav
 - src: "she had your dark suit in greasy wash water all year"
 - res: "assistance assistance"
 --------------------------------------------------------------------------------
 Median WER: 
 --------------------------------------------------------------------------------
 WER: 1.000000, CER: 0.788462, loss: 244.849197
 - wav: data/smoke_test/LDC93S1.wav
 - src: "she had your dark suit in greasy wash water all year"
 - res: "assistance assistance"
 --------------------------------------------------------------------------------
 Worst WER: 
 --------------------------------------------------------------------------------
 WER: 1.000000, CER: 0.788462, loss: 244.849197
 - wav: data/smoke_test/LDC93S1.wav
 - src: "she had your dark suit in greasy wash water all year"
 - res: "assistance assistance"
 --------------------------------------------------------------------------------
 W Specifying --export_dir when calling train module. Use python -m coqui_stt_training.export Using the training module as a generic driver for all training related functionality is deprecated and will be removed soon. Use the specific modules: 
 W     python -m coqui_stt_training.train
 W     python -m coqui_stt_training.evaluate
 W     python -m coqui_stt_training.export
 W     python -m coqui_stt_training.training_graph_inference
 I Exporting the model...
 I Loading best validating checkpoint from /tmp/ckpt/best_dev-2
 I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
 I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
 I Loading variable from checkpoint: layer_1/bias
 I Loading variable from checkpoint: layer_1/weights
 I Loading variable from checkpoint: layer_2/bias
 I Loading variable from checkpoint: layer_2/weights
 I Loading variable from checkpoint: layer_3/bias
 I Loading variable from checkpoint: layer_3/weights
 I Loading variable from checkpoint: layer_5/bias
 I Loading variable from checkpoint: layer_5/weights
 I Loading variable from checkpoint: layer_6/bias
 I Loading variable from checkpoint: layer_6/weights
 I Models exported at /tmp/train
 I Model metadata file saved to /tmp/train/author_model_0.0.1.md. Before submitting the exported model for publishing make sure all information in the metadata file is correct, and complete the URL fields.
 + popd
 ~
 + export TMP=/mnt/tmp
 + export TEMP=/mnt/tmp
 + . params.sh
 + set -xe
 + export IMPORTERS_VALIDATE_LOCALE=
 + export CV_RELEASE_FILENAME=
 + export CV_RELEASE_SHA256=
 + export LINGUA_LIBRE_QID=
 + export LINGUA_LIBRE_ISO639=
 + export LINGUA_LIBRE_ENGLISH=
 + export LINGUA_LIBRE_SKIPLIST=
 + export M_AILABS_LANG=
 + export M_AILABS_SKIP=
 + export LM_ICONV_LOCALE=
 + export MODEL_EXPORT_SHORT_LANG=
 + export MODEL_EXPORT_LONG_LANG=
 + export MODEL_EXPORT_ZIP_LANG=
 + . fr_custom/params.sh
 + set -xe
 + export IMPORTERS_VALIDATE_LOCALE=--validate_label_locale /home/trainer/validate_label.py
 + export LINGUA_LIBRE_QID=21
 + export LINGUA_LIBRE_ISO639=fra
 + export LINGUA_LIBRE_ENGLISH=French
 + export LINGUA_LIBRE_SKIPLIST=/home/trainer/fr_custom/lingua_libre_skiplist.txt
 + export M_AILABS_LANG=fr_FR
 + export M_AILABS_SKIP=monsieur_lecoq,les_mysteres_de_paris
 + export LM_ICONV_LOCALE=fr_FR.UTF-8
 + export MODEL_EXPORT_ZIP_LANG=fr-custom
 + [ -x fr_custom/metadata.sh ]
 + . fr_custom/metadata.sh
 + set -xe
 + export METADATA_AUTHOR= 
 + export METADATA_MODEL_NAME=STT-fr-waser
 + export METADATA_MODEL_VERSION=0.0.1
 + export [email protected]
 + export METADATA_LICENSE=MIT-0
 + export METADATA_LANGUAGE=fr-CH
 + export METADATA_MIN_STT_VERSION=0.7
 + export METADATA_MAX_STT_VERSION=1.9
 + export METADATA_DESCRIPTION=<on tail models for french STT fine-tuned for waser>
 + cd fr_custom
 + importers.sh
 + [ -f /transfer-checkpoint/checkpoint -a ! -f /mnt/models/fr/output_graph.tflite -a ! -z  -a ! -z  ]
 + ./import_mls.sh
 + pushd /home/trainer/stt
 ~/stt ~/fr_custom
 + '[' '!' -f /mnt/extracted/fr/mls_lm.txt ']'
 + popd
 ~/fr_custom
 + ../import_assistant.sh
 + '[' '!' -f /mnt/helpers/.stt.export.lm.management ']'
 + '[' 1 '!=' 1 ']'
 + '[' '!' -f /mnt/extracted/fr/_assistant_lm_.txt ']'
 + mkdir -p /mnt/helpers
 + python /home/trainer/import_assistant.py --lang fr --filter_alphabet /mnt/models/fr/alphabet.txt --normalize --validate_label_locale /home/trainer/validate_label.py /mnt/STT/helpers/.stt.export.mangement
 + cd ..
 + generate_alphabet.sh
 + pushd /home/trainer/stt
 ~/stt ~
 ++ find /mnt/extracted/fr/data/ -type f -name '*train.csv' -printf %p,
 ++ sed -e 's/,$//g'
 + all_train_csv=
 ++ find /mnt/extracted/fr/data/ -type f -name '*dev.csv' -printf %p,
 ++ sed -e 's/,$//g'
 + all_dev_csv=
 ++ find /mnt/extracted/fr/data/ -type f -name '*test.csv' -printf %p,
 ++ sed -e 's/,$//g'
 + all_test_csv=/mnt/extracted/fr/data/Assistant/train_test.csv
 ++ joinByChar , /mnt/extracted/fr/data/Assistant/train_test.csv
 ++ local IFS=,
 ++ shift
 ++ echo /mnt/extracted/fr/data/Assistant/train_test.csv
 + csv_files=/mnt/extracted/fr/data/Assistant/train_test.csv
 + mkdir -p /mnt/models/fr
 + '[' '!' -f /mnt/models/fr/alphabet.txt ']'
 + popd
 ~
 + build_lm.sh
 + '[' 0 = 1 ']'
 + pushd /mnt/extracted/
 /mnt/extracted ~
 + /home/trainer/fr_custom/prepare_lm.sh
 + '[' '!' -f fr/wiki_fr_lower.txt ']'
 + '[' '!' -f debats-assemblee-nationale.txt ']'
 + curl -sSL https://github.com/Common-Voice/commonvoice-fr/releases/download/lm-0.1/debats-assemblee-nationale.txt.xz
 + pixz -d
 + tr '[:upper:]' '[:lower:]'
 can not seek in input: Illegal seek
 + '[' 0 = 1 ']'
 + '[' 1 = 1 ']'
 + '[' '!' -f fr/excluded_max_sec_lm.txt ']'
 + '[' 1 = 1 ']'
 + '[' -f fr/excluded_max_sec_lm.txt ']'
 + EXCLUDED_LM_SOURCE=fr/excluded_max_sec_lm.txt
 + '[' '!' -f sources_lm.txt ']'
 + cat fr/wiki_fr_lower.txt fr/debats-assemblee-nationale.txt fr/mls_lm.txt fr/excluded_max_sec_lm.txt
 + sed -e 's/<s>/ /g'
 + '[' '!' -f fr/sources_lm.txt ']'
 + popd
 ~
 + pushd /home/trainer/stt
 ~/stt ~
 + mkdir -p /mnt/lm/fr
 + '[' '!' -f /mnt/lm/fr/lm.binary ']'
 + ./generate_scorer_package --checkpoint /mnt/models/fr/ --lm /mnt/lm/fr/lm.binary --vocab /mnt/lm/fr/vocab-500000.txt --package /mnt/lm/fr/kenlm.scorer --default_alpha 0.0 --default_beta 0.0
 500000 unique words read from vocabulary file.
 Doesn't look like a character based (Bytes Are All You Need) model.
 --force_bytes_output_mode was not specified, using value infered from vocabulary contents: false
 Package created in /mnt/lm/fr/kenlm.scorer.
 + popd
 ~
 + '[' 0 = 1 ']'
 + train.sh
 + pushd /home/trainer/stt
 ~/stt ~
 ++ find /mnt/extracted/fr/data/ -type f -name '*train.csv' -printf '%p '
 ++ sed -e 's/ $//g'
 + all_train_csv=
 ++ find /mnt/extracted/fr/data/ -type f -name '*dev.csv' -printf '%p '
 ++ sed -e 's/ $//g'
 + all_dev_csv=
 ++ find /mnt/extracted/fr/data/ -type f -name '*test.csv' -printf '%p '
 ++ sed -e 's/ $//g'
 + all_test_csv=/mnt/extracted/fr/data/Assistant/train_test.csv
 + [[ -z '' ]]
 + echo 'No data for training.'
 No data for training.
 + exit 0
 + evaluate_lm.sh
 + pushd /mnt/lm/fr
 /mnt/lm/fr ~
 + '[' '!' -f /mnt/checkpoints/fr/best_dev_checkpoint -a -f /transfer-checkpoint/best_dev_checkpoint ']'
 + LOAD_CHECKPOINT_FROM='--checkpoint_dir /transfer-checkpoint'
 ++ find /mnt/extracted/fr/data/ -type f -name '*test.csv' -printf '%p '
 ++ sed -e 's/ $//g'
 + all_test_csv=/mnt/extracted/fr/data/Assistant/train_test.csv
 + '[' -z 2,4,50 ']'
 + '[' '!' -z 2,4,50 -a '!' -f /mnt/lm/fr/opt_lm.yml ']'
 ++ echo 2,4,50
 ++ cut -d, -f1
 + LM_ALPHA_MAX=2
 ++ echo 2,4,50
 ++ cut -d, -f2
 + LM_BETA_MAX=4
 ++ echo 2,4,50
 ++ cut -d, -f3
 + LM_N_TRIALS=50
 + python -u /home/trainer/lm_optimizer.py --show_progressbar true --train_cudnn true --alphabet_config_path /mnt/models/fr/alphabet.txt --scorer_path /mnt/lm/fr/kenlm.scorer --feature_cache /mnt/sources/fr/feature_cache --test_files /mnt/extracted/fr/data/Assistant/train_test.csv --test_batch_size 64 --n_hidden 2048 --lm_alpha_max 2 --lm_beta_max 4 --n_trials 50 --checkpoint_dir /transfer-checkpoint
 WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
 [I 2023-01-22 23:18:04,503] A new study created in memory with name: no-name-0f421b63-297c-468c-b30d-8aa59857a843
 /home/trainer/stt/training/coqui_stt_training/util/lm_optimize.py:30: FutureWarning: suggest_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
  Config.lm_alpha = trial.suggest_uniform("lm_alpha", 0, Config.lm_alpha_max)
 /home/trainer/stt/training/coqui_stt_training/util/lm_optimize.py:31: FutureWarning: suggest_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
  Config.lm_beta = trial.suggest_uniform("lm_beta", 0, Config.lm_beta_max)
 I Loading best validating checkpoint from /mnt/checkpoints/best_dev-221133
 W Checkpoint loading failed due to missing tensors, retrying with --load_cudnn true - You should specify this flag whenever loading a checkpoint that was created with --train_cudnn true in an environment that has CuDNN disabled.
 [W 2023-01-22 23:18:05,201] Trial 0 failed with parameters: {'lm_alpha': 0.26985826312830485, 'lm_beta': 1.3371065634850314} because of the following error: NotFoundError().
 Traceback (most recent call last):
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 121, in _load_checkpoint
    return _load_checkpoint_impl(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 21, in _load_checkpoint_impl
    ckpt = tfv1.train.load_checkpoint(checkpoint_path)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 66, in load_checkpoint
    return pywrap_tensorflow.NewCheckpointReader(filename)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 873, in NewCheckpointReader
    return CheckpointReader(compat.as_bytes(filepattern))
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 885, in __init__
    this = _pywrap_tensorflow_internal.new_CheckpointReader(filename)
 tensorflow.python.framework.errors_impl.NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for /mnt/checkpoints/best_dev-221133

 During handling of the above exception, another exception occurred:

 Traceback (most recent call last):
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/home/trainer/stt/training/coqui_stt_training/util/lm_optimize.py", line 39, in objective
    current_samples = evaluate([test_file], create_model)
  File "/home/trainer/stt/training/coqui_stt_training/evaluate.py", line 99, in evaluate
    load_graph_for_evaluation(session)
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 233, in load_graph_for_evaluation
    _load_or_init_impl(session, methods, allow_drop_layers=False, silent=silent)
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 166, in _load_or_init_impl
    return _load_checkpoint(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 141, in _load_checkpoint
    return _load_checkpoint_impl(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 21, in _load_checkpoint_impl
    ckpt = tfv1.train.load_checkpoint(checkpoint_path)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 66, in load_checkpoint
    return pywrap_tensorflow.NewCheckpointReader(filename)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 873, in NewCheckpointReader
    return CheckpointReader(compat.as_bytes(filepattern))
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 885, in __init__
    this = _pywrap_tensorflow_internal.new_CheckpointReader(filename)
 tensorflow.python.framework.errors_impl.NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for /mnt/checkpoints/best_dev-221133
 [W 2023-01-22 23:18:05,217] Trial 0 failed with value None.
 Traceback (most recent call last):
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 121, in _load_checkpoint
    return _load_checkpoint_impl(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 21, in _load_checkpoint_impl
    ckpt = tfv1.train.load_checkpoint(checkpoint_path)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 66, in load_checkpoint
    return pywrap_tensorflow.NewCheckpointReader(filename)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 873, in NewCheckpointReader
    return CheckpointReader(compat.as_bytes(filepattern))
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 885, in __init__
    this = _pywrap_tensorflow_internal.new_CheckpointReader(filename)
 tensorflow.python.framework.errors_impl.NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for /mnt/checkpoints/best_dev-221133

 During handling of the above exception, another exception occurred:

 Traceback (most recent call last):
  File "/home/trainer/lm_optimizer.py", line 46, in <module>
    results = lm_optimize.compute_lm_optimization()
  File "/home/trainer/stt/training/coqui_stt_training/util/lm_optimize.py", line 59, in compute_lm_optimization
    study.optimize(objective, n_jobs=1, n_trials=Config.n_trials)
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/study.py", line 425, in optimize
    _optimize(
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/_optimize.py", line 66, in _optimize
    _optimize_sequential(
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/_optimize.py", line 163, in _optimize_sequential
    frozen_trial = _run_trial(study, func, catch)
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/_optimize.py", line 251, in _run_trial
    raise func_err
  File "/home/trainer/stt-train/lib/python3.8/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/home/trainer/stt/training/coqui_stt_training/util/lm_optimize.py", line 39, in objective
    current_samples = evaluate([test_file], create_model)
  File "/home/trainer/stt/training/coqui_stt_training/evaluate.py", line 99, in evaluate
    load_graph_for_evaluation(session)
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 233, in load_graph_for_evaluation
    _load_or_init_impl(session, methods, allow_drop_layers=False, silent=silent)
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 166, in _load_or_init_impl
    return _load_checkpoint(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 141, in _load_checkpoint
    return _load_checkpoint_impl(
  File "/home/trainer/stt/training/coqui_stt_training/util/checkpoints.py", line 21, in _load_checkpoint_impl
    ckpt = tfv1.train.load_checkpoint(checkpoint_path)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/training/checkpoint_utils.py", line 66, in load_checkpoint
    return pywrap_tensorflow.NewCheckpointReader(filename)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 873, in NewCheckpointReader
    return CheckpointReader(compat.as_bytes(filepattern))
  File "/usr/local/lib/python3.8/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 885, in __init__
    this = _pywrap_tensorflow_internal.new_CheckpointReader(filename)
 tensorflow.python.framework.errors_impl.NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for /mnt/checkpoints/best_dev-221133

     {!} : Aborted
         : Container exited with code 1.