Created
October 10, 2014 18:35
-
-
Save Holzhaus/51bf99eaacd7a420199a to your computer and use it in GitHub Desktop.
Very Basic Julius STT Engine for Jasper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# You need to download the VoxForge Dict from: | |
# http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Lexicon/VoxForge.tgz | |
# and put it to this location: | |
# /home/jan/Downloads/VoxForge.tgz | |
import os | |
import re | |
import tempfile | |
import subprocess | |
import shutil | |
import tarfile | |
from contextlib import contextmanager | |
from vocabcompiler import AbstractVocabulary, get_all_phrases | |
from stt import AbstractSTTEngine | |
class JuliusSTT(AbstractSTTEngine): | |
SLUG = 'julius-stt' | |
def __init__(self, vocabulary=None, hmmdefs="/usr/share/voxforge/julius/" + | |
"acoustic_model_files/hmmdefs", tiedlist="/usr/share/" + | |
"voxforge/julius/acoustic_model_files/tiedlist"): | |
self._vocabulary = vocabulary | |
self._hmmdefs = hmmdefs | |
self._tiedlist = tiedlist | |
self._pattern = re.compile(r'sentence1: <s> (.+) <s>') | |
def transcribe(self, fp, mode=None): | |
cmd = ['julius', | |
'-quiet', | |
'-nolog', | |
'-input', 'stdin', | |
'-dfa', self._vocabulary.dfa_file, | |
'-v', self._vocabulary.dict_file, | |
'-h', self._hmmdefs, | |
'-hlist', self._tiedlist, | |
'-forcedict'] | |
cmd = [str(x) for x in cmd] | |
with tempfile.SpooledTemporaryFile() as out_f: | |
with tempfile.SpooledTemporaryFile() as err_f: | |
subprocess.call(cmd, stdin=fp, stdout=out_f, stderr=err_f) | |
out_f.seek(0) | |
matchobj = self._pattern.search(out_f.read()) | |
return matchobj.group(1) if matchobj else "" | |
class JuliusVocabulary(AbstractVocabulary): | |
PATH_PREFIX = 'julius-vocabulary' | |
@property | |
def dfa_file(self): | |
""" | |
Returns: | |
The path of the the julius dfa file as string | |
""" | |
return os.path.join(self.path, 'dfa') | |
@property | |
def dict_file(self): | |
""" | |
Returns: | |
The path of the the julius dict file as string | |
""" | |
return os.path.join(self.path, 'dict') | |
@property | |
def is_compiled(self): | |
return (super(self.__class__, self).is_compiled and | |
os.access(self.dfa_file, os.R_OK) and | |
os.access(self.dict_file, os.R_OK)) | |
def _get_grammar(self, phrases): | |
return {'S': [['NS_B', 'WORD_LOOP', 'NS_E']], | |
'WORD_LOOP': [['WORD_LOOP', 'WORD'], ['WORD']]} | |
def _get_word_defs(self, phrases): | |
word_defs = {'NS_B': [('<s>', 'sil')], | |
'NS_E': [('<s>', 'sil')], | |
'WORD': []} | |
words = [] | |
for phrase in phrases: | |
if ' ' in phrase: | |
for word in phrase.split(' '): | |
words.append(word) | |
else: | |
words.append(phrase) | |
g2p = JuliusG2P('/home/jan/Downloads/VoxForge.tgz') | |
for word in words: | |
for phoneme in g2p.translate(word): | |
word_defs['WORD'].append((word, phoneme)) | |
return word_defs | |
def _compile_vocabulary(self, phrases): | |
prefix = 'jasper' | |
tmpdir = tempfile.mkdtemp() | |
# Create grammar file | |
tmp_grammar_file = os.path.join(tmpdir, os.extsep.join([prefix, | |
'grammar'])) | |
with open(tmp_grammar_file, 'w') as f: | |
grammar = self._get_grammar(phrases) | |
for definition in grammar.pop('S'): | |
f.write("%s: %s\n" % ('S', ' '.join(definition))) | |
for name, definitions in grammar.items(): | |
for definition in definitions: | |
f.write("%s: %s\n" % (name, ' '.join(definition))) | |
# Create voca file | |
tmp_voca_file = os.path.join(tmpdir, os.extsep.join([prefix, 'voca'])) | |
with open(tmp_voca_file, 'w') as f: | |
for category, words in self._get_word_defs(phrases).items(): | |
f.write("%% %s\n" % category) | |
for word, phoneme in words: | |
f.write("%s\t\t\t%s\n" % (word, phoneme)) | |
# mkdfa.pl | |
olddir = os.getcwd() | |
os.chdir(tmpdir) | |
cmd = ['mkdfa.pl', str(prefix)] | |
with tempfile.SpooledTemporaryFile() as out_f: | |
with tempfile.SpooledTemporaryFile() as err_f: | |
subprocess.call(cmd, stdout=out_f, stderr=err_f) | |
err_f.seek(0) | |
self._logger.debug(err_f.read().strip()) | |
out_f.seek(0) | |
self._logger.debug(out_f.read().strip()) | |
os.chdir(olddir) | |
tmp_dfa_file = os.path.join(tmpdir, os.extsep.join([prefix, 'dfa'])) | |
tmp_dict_file = os.path.join(tmpdir, os.extsep.join([prefix, 'dict'])) | |
shutil.move(tmp_dfa_file, self.dfa_file) | |
shutil.move(tmp_dict_file, self.dict_file) | |
shutil.rmtree(tmpdir) | |
class JuliusG2P(object): | |
def __init__(self, lexicon_file): | |
self._lexicon_file = lexicon_file | |
self._lexicon_data = self._parse_lexicon(self._lexicon_file) | |
@contextmanager | |
def _open_lexicon(self, fname): | |
if tarfile.is_tarfile(fname): | |
with tarfile.open(fname) as tf: | |
lex = tf.getmember('VoxForge/VoxForgeDict') | |
f = tf.extractfile(lex) | |
yield f | |
f.close() | |
else: | |
with open(self._lexicon_file, 'r') as f: | |
yield f | |
def _parse_lexicon(self, fname): | |
data = {} | |
with self._open_lexicon(fname) as f: | |
for line in f: | |
if line and '[' in line and ']' in line: | |
word, phoneme = line[line.index('[')+1:].split("]", 1) | |
word = word.strip() | |
phoneme = phoneme.strip() | |
phoneme = phoneme.replace('+', ' ').replace('-', ' ') | |
if word in data: | |
data[word].append(phoneme) | |
else: | |
data[word] = [phoneme] | |
return data | |
def translate(self, word): | |
phonemes = [] | |
if word in self._lexicon_data: | |
phonemes = self._lexicon_data[word] | |
elif word.replace('-', ' ') in self._lexicon_data: | |
phonemes = self._lexicon_data[word.replace('-', '')] | |
return phonemes | |
if __name__ == '__main__': | |
import jasperpath | |
phrases = get_all_phrases() | |
vocab = JuliusVocabulary(path=tempfile.mkdtemp()) | |
print("Vocabulary in: %s" % vocab.path) | |
print("Revision file: %s" % vocab.revision_file) | |
print("Compiled revision: %s" % vocab.compiled_revision) | |
print("Is compiled: %r" % vocab.is_compiled) | |
print("Matches phrases: %r" % vocab.matches_phrases(phrases)) | |
if not vocab.is_compiled or not vocab.matches_phrases(phrases): | |
print("Compiling...") | |
vocab.compile(phrases) | |
print("") | |
print("Vocabulary in: %s" % vocab.path) | |
print("Revision file: %s" % vocab.revision_file) | |
print("Compiled revision: %s" % vocab.compiled_revision) | |
print("Is compiled: %r" % vocab.is_compiled) | |
print("Matches phrases: %r" % vocab.matches_phrases(phrases)) | |
print("") | |
sttinst = JuliusSTT(vocabulary=vocab) | |
with open(jasperpath.data('audio', 'time.wav'), mode="rb") as f: | |
print sttinst.transcribe(f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment