Created
April 20, 2020 14:52
-
-
Save sberryman/1e87cccecb02e7493267a48548547caa to your computer and use it in GitHub Desktop.
Montreal Forced Aligner Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FROM kaldiasr/kaldi:latest | |
FROM python:3.6-buster | |
WORKDIR "/workspace" | |
RUN apt-get clean \ | |
&& apt-get update \ | |
&& apt-get install -y wget \ | |
automake autoconf sox libtool subversion libatlas3-base gfortran \ | |
&& apt-get -y autoremove | |
# ADD requirements.txt requirements.txt | |
# RUN pip install -r requirements.txt | |
# kaldi (specific version!) | |
ENV KALDI_VERSION=094d22746b604fd20c2b8730966c9d0bc9f2170b | |
RUN cd /opt && \ | |
git clone https://github.com/kaldi-asr/kaldi.git && \ | |
cd kaldi && \ | |
git checkout $KALDI_VERSION && \ | |
cd tools && \ | |
make -j $(NPROC) && \ | |
make openblas && \ | |
cd ../src && \ | |
./configure --shared --openblas-root=/opt/kaldi/tools/OpenBLAS/install && \ | |
make depend -j $(NPROC) && \ | |
make -j 4 | |
# OpenGrm-Ngram | |
ENV OPENGRM_NGRAM=1.3.4 | |
RUN cd /opt && \ | |
wget "http://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \ | |
tar -xf "opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \ | |
rm "opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \ | |
cd "opengrm-ngram-$OPENGRM_NGRAM" && \ | |
export LD_LIBRARY_PATH=/opt/kaldi/tools/openfst/lib && \ | |
export CPLUS_INCLUDE_PATH=/opt/kaldi/tools/openfst/src/include && \ | |
./configure --prefix=`pwd`/install && \ | |
make -j 4 && \ | |
make install | |
# Phonetisaurus | |
ENV PHONETISAURUS_VERSION=64719ca40c17cb70d810fffadac52c97984ca539 | |
RUN cd /opt && \ | |
git clone https://github.com/AdolfVonKleist/Phonetisaurus.git && \ | |
cd Phonetisaurus && \ | |
git checkout $PHONETISAURUS_VERSION && \ | |
./configure \ | |
--enable-static=no \ | |
--with-openfst-includes=/opt/kaldi/tools/openfst/include \ | |
--with-openfst-libs=/opt/kaldi/tools/openfst/lib && \ | |
make -j 4 | |
# aligner | |
ENV MONTREAL_ALIGNER_VERSION=1.0.1 | |
RUN cd /opt && \ | |
git clone https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner.git && \ | |
cd Montreal-Forced-Aligner && \ | |
python thirdparty/kaldi_binaries.py /opt/kaldi && \ | |
python thirdparty/opengrm_ngram_binaries.py /opt/opengrm-ngram-$OPENGRM_NGRAM && \ | |
python thirdparty/phonetisaurus_binaries.py /opt/Phonetisaurus && \ | |
pip install -r requirements.txt && \ | |
mkdir pretrained_models && \ | |
cd pretrained_models && \ | |
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/raw/dc09bb3d7302bc66eb8fdef543b44aa0fab61b07/pretrained_models/english.zip && \ | |
cd ../ && \ | |
python freezing/freeze.py | |
WORKDIR "/opt/Montreal-Forced-Aligner/dist/montreal-forced-aligner" | |
# only requirement is TextGridTools | |
RUN pip install tgt | |
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use a different docker image! | |
# make build_align && make run_align | |
# bin/mfa_align \ | |
# /datasets/CommonVoice/en/speakers \ | |
# /datasets/slr60/english.dict \ | |
# /opt/Montreal-Forced-Aligner/dist/montreal-forced-aligner/pretrained_models/english.zip \ | |
# /output/montreal-aligned/cv-en/ | |
# bin/mfa_validate_dataset \ | |
# /datasets/slr60/test-clean \ | |
# /datasets/slr60/english.dict\ | |
# english | |
import sys | |
import tgt | |
from pathlib import Path | |
from tqdm import tqdm | |
DATASET = 'dev-clean' | |
# DATASET = 'train-clean-100' | |
# DATASET = 'train-clean-360' | |
dataset_path = Path('/datasets/slr60/{}'.format(DATASET)) | |
base_path = Path('/output/montreal-aligned/{}'.format(DATASET)) | |
speaker_dirs = [f for f in base_path.glob("*") if f.is_dir()] | |
for speaker_dir in tqdm(speaker_dirs): | |
book_dirs = [f for f in speaker_dir.glob("*") if f.is_dir()] | |
for book_dir in book_dirs: | |
alignment_file = dataset_path.joinpath( | |
speaker_dir.stem, | |
book_dir.stem, | |
"{0}_{1}.alignment.txt".format(speaker_dir.stem, book_dir.stem) | |
) | |
with open(alignment_file, 'w', encoding='utf-8') as out_file: | |
# find our textgrid files | |
textgrid_files = sorted([f for f in book_dir.glob("*.TextGrid") if f.is_file()]) | |
# process each grid file and add to our output | |
for textgrid_file in textgrid_files: | |
# read the raw transcript as well | |
transcript_file = dataset_path.joinpath( | |
speaker_dir.stem, | |
book_dir.stem, | |
"{0}.txt".format(textgrid_file.stem) | |
) | |
with open(transcript_file, 'r', encoding='utf-8') as in_file: | |
transcript = in_file.read() | |
# read the grid | |
input = tgt.io.read_textgrid(textgrid_file) | |
print("input: {}".format(input)) | |
sys.exit(1) | |
# get all the word tiers | |
word_tier = input.get_tier_by_name('words') | |
out_file.write("{0} \"{1}\" \"{2}\" {3}\n".format( | |
textgrid_file.stem, | |
",".join(list(map(lambda interval: interval.text, word_tier.intervals))), | |
",".join(list(map(lambda interval: str(interval.end_time), word_tier.intervals))), | |
transcript | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment