- GPU: Tesla v100
- Ubuntu 18.04
apt update
apt install cmake gcc-7 liblzma-dev libbz2-dev
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
dpkg -i cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
apt-get update
wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
dpkg -i ./nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
apt install libnccl2 libnccl-dev
apt-get update
apt-get install --no-install-recommends \
cuda-10-0 \
libcudnn7=7.4.1.5-1+cuda10.0 \
libcudnn7-dev=7.4.1.5-1+cuda10.0
echo 'export PATH=/usr/local/cuda-10.0/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
reboot
nvidia-smi
git clone https://github.com/flashlight/flashlight.git
source flashlight/scripts/colab/colab_install_deps.sh
cd ~
export MKLROOT=/opt/intel/mkl
export ArrayFire_DIR=/opt/arrayfire/share/ArrayFire/cmake
export DNNL_DIR=/opt/dnnl/dnnl_lnx_2.0.0_cpu_iomp/lib/cmake/dnnl
cd flashlight && git checkout d2e1924cb2a2b32b48cc326bb7e332ca3ea54f67 && mkdir -p build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release \
-DFL_BUILD_TESTS=OFF \
-DFL_BUILD_EXAMPLES=OFF \
-DFL_BUILD_APP_ASR=ON && \
make -j$(nproc)
# check binaries
ls ~/flashlight/build/bin/asr
cd ~ && mkdir test-recognizer && cd test-recognizer
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/am_transformer_ctc_stride3_letters_300Mparams.bin
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/am_conformer_ctc_stride3_letters_25Mparams.bin
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/tokens.txt
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/lexicon.txt
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/lm_common_crawl_small_4gram_prun0-6-15_200kvocab.bin
mkdir audio && cd audio
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/audio/116-288045-0000.flac
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/audio/116-288045-0001.flac
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/audio/116-288045-0002.flac
wget https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/audio/116-288045-0003.flac
infer.py:
import os
import signal
from subprocess import Popen, PIPE
def read_current_output(process):
while True:
output = process.stderr.readline()
print(output.decode().strip())
if "Waiting the input in the format" in output.decode():
break;
def create_process(cmd):
process = Popen([cmd],
stdin=PIPE, stdout=PIPE, stderr=PIPE,
shell=True, preexec_fn=os.setsid)
read_current_output(process)
return process
def run_inference(audio_path, process):
process.stdin.write("{}\n".format(audio_path).encode())
process.stdin.flush()
read_current_output(process)
inference_cmd = """/root/flashlight/build/bin/asr/fl_asr_tutorial_inference_ctc \
--am_path=/root/test-recognizer/am_transformer_ctc_stride3_letters_300Mparams.bin \
--tokens_path=/root/test-recognizer/tokens.txt \
--lexicon_path=/root/test-recognizer/lexicon.txt \
--lm_path=/root/test-recognizer/lm_common_crawl_small_4gram_prun0-6-15_200kvocab.bin \
--logtostderr=true \
--sample_rate=16000 \
--beam_size=50 \
--beam_size_token=30 \
--beam_threshold=100 \
--lm_weight=1.5 \
--word_score=0"""
inference_process = create_process(inference_cmd)
# do inference
run_inference("116-288045-0000.flac", inference_process)
run_inference("116-288045-0001.flac", inference_process)
run_inference("116-288045-0002.flac", inference_process)
run_inference("116-288045-0003.flac", inference_process)
# kill the process
os.killpg(os.getpgid(inference_process.pid), signal.SIGTERM)
run:
python infer.py
output:
root@test3:~/test-recognizer/audio# python infer.py
I0526 00:42:22.619159 83168 CachingMemoryManager.cpp:114 CachingMemoryManager recyclingSizeLimit_=18446744073709551615 (16777216.00 TiB) splitSizeLimit_=18446744073709551615 (16777216.00 TiB)
I0526 00:42:22.619926 14145 InferenceCTC.cpp:66] Gflags after parsing
--flagfile=;--fromenv=;--tryfromenv=;--undefok=;--tab_completion_columns=80;--tab_completion_word=;--help=false;--helpfull=false;--helpmatch=;--helpon=;--helppackage=false;--helpshort=false;--helpxml=false;--version=false;--am_path=/root/test-recognizer/am_transformer_ctc_stride3_letters_300Mparams.bin;--audio_list=;--beam_size=50;--beam_size_token=30;--beam_threshold=100;--lexicon_path=/root/test-recognizer/lexicon.txt;--lm_path=/root/test-recognizer/lm_common_crawl_small_4gram_prun0-6-15_200kvocab.bin;--lm_weight=1.5;--sample_rate=16000;--tokens_path=/root/test-recognizer/tokens.txt;--word_score=0;--alsologtoemail=;--alsologtostderr=false;--colorlogtostderr=false;--drop_log_memory=true;--log_backtrace_at=;--log_dir=;--log_link=;--log_prefix=true;--logbuflevel=0;--logbufsecs=30;--logemaillevel=999;--logfile_mode=436;--logmailer=/bin/mail;--logtostderr=true;--max_log_size=1800;--minloglevel=0;--stderrthreshold=2;--stop_logging_if_full_disk=false;--symbolize_stacktrace=true;--v=0;--vmodule=;
I0526 00:42:22.620344 14145 InferenceCTC.cpp:89] [Inference tutorial for CTC] Reading acoustic model from /root/test-recognizer/am_transformer_ctc_stride3_letters_300Mparams.bin
I0526 00:42:23.081338 14145 InferenceCTC.cpp:140] [Inference tutorial for CTC] Network is loaded.
I0526 00:42:23.362196 14145 InferenceCTC.cpp:152] [Inference tutorial for CTC] Number of classes/tokens in the network: 29
I0526 00:42:23.362344 14145 InferenceCTC.cpp:155] [Inference tutorial for CTC] Number of words in the lexicon: 200001
I0526 00:42:23.521153 14145 InferenceCTC.cpp:166] [Inference tutorial for CTC] Language model is constructed.
I0526 00:42:24.493661 14145 InferenceCTC.cpp:177] [Inference tutorial for CTC] Trie is planted.
I0526 00:42:24.495051 14145 InferenceCTC.cpp:196] [Inference tutorial for CTC] Beam search decoder is created
I0526 00:42:24.501401 14145 InferenceCTC.cpp:246] [Inference tutorial for CTC]: Waiting the input in the format [audio_path].
I0526 00:42:28.565971 14145 InferenceCTC.cpp:297] [Inference tutorial for CTC]: predicted output for 116-288045-0000.flac
as i approached the city i heard bells ringing and a little later i found the streets astir with throngs of well dressed people in family groups winding their way hither and thither
I0526 00:42:28.566220 14145 InferenceCTC.cpp:246] [Inference tutorial for CTC]: Waiting the input in the format [audio_path].
I0526 00:42:28.859449 14145 InferenceCTC.cpp:297] [Inference tutorial for CTC]: predicted output for 116-288045-0001.flac
looking about me i saw a gentleman in a neat black dress smiling and his hand extended to me with great cordiality
I0526 00:42:28.859648 14145 InferenceCTC.cpp:246] [Inference tutorial for CTC]: Waiting the input in the format [audio_path].
I0526 00:42:28.984359 14145 InferenceCTC.cpp:297] [Inference tutorial for CTC]: predicted output for 116-288045-0002.flac
he must have realized i was a stranger and wished to tender his hospitality to me i accepted it gratefully i clasped his hand he pressed mine
I0526 00:42:28.984593 14145 InferenceCTC.cpp:246] [Inference tutorial for CTC]: Waiting the input in the format [audio_path].
I0526 00:42:29.042457 14145 InferenceCTC.cpp:297] [Inference tutorial for CTC]: predicted output for 116-288045-0003.flac
we gazed for a moment slightly into each other's eyes
I0526 00:42:29.042665 14145 InferenceCTC.cpp:246] [Inference tutorial for CTC]: Waiting the input in the format [audio_path].
# if building of flashlight is crashing because of low memory on a machine, then add a swap
fallocate -l 5G /swapfile && chmod 600 /swapfile && mkswap /swapfile && swapon /swapfile
Links:
- https://t.me/speech_recognition_uk (Ukrainian Speech Recognition Community)
- https://coreweave.com/ (GPU cloud)
- https://github.com/flashlight/flashlight/tree/main/flashlight/app/asr
- https://colab.research.google.com/github/flashlight/flashlight/blob/master/flashlight/app/asr/tutorial/notebooks/InferenceAndAlignmentCTC.ipynb
- https://github.com/flashlight/flashlight