Created
February 18, 2012 02:08
-
-
Save kylebgorman/1856905 to your computer and use it in GitHub Desktop.
TIMIT+: Make TIMIT bearable (see top for instructions)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# TIMIT+.py: make TIMIT bearable to use | |
# Kyle Gorman <[email protected] | |
# | |
# To use this: | |
# 1. place in the same directory as a copy of TIMIT | |
# 2. install SoX and textgrid.py | |
# 3. run ./TIMIT+.py | |
# | |
# A new copy of TIMIT, resampled with RIFF headers and textgrids and sorted | |
# properly, will be placed in your working directory. | |
from glob import iglob | |
from errno import EEXIST | |
from shutil import copytree | |
from os import makedirs, path, popen, remove, symlink | |
from textgrid import IntervalTier, TextGrid | |
# http://github.com/kylebgorman/textgrid.py | |
## imperfections of this: | |
# don't know what the source of DX or Q is | |
# don't have a proper treatment for syllabic sonorants | |
## global | |
SR = 16000. | |
OLDTIMIT = 'TIMIT' | |
NEWTIMIT = 'TIMIT+' | |
phone_list = set(['AO', 'AA', 'AE', 'AH', 'AW', 'AY', 'B', 'CH', 'D', 'DH', | |
'EH', 'ER', 'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', | |
'M', 'N', 'NG', 'OW', 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', | |
'UH', 'UW', 'V', 'W', 'Y', 'Z', 'ZH', 'DX', 'Q']) | |
_fix = {'H#': 'sil', 'PAU': 'sp', 'EPI': 'sp', 'PCL': 'sp', 'BCL': 'sp', | |
'TCL': 'sp', 'DCL': 'sp', 'KCL': 'sp', 'GCL': 'sp', 'EL': 'L', | |
'EM': 'M', 'EN': 'N', 'ENG': 'NG', 'NX': 'N', 'HV': 'HH', 'AXR': 'ER', | |
'UX': 'UW', 'IX': 'AH', 'AX': 'AH', 'AX-H': 'AH'} | |
## helpers | |
def transform_phone(phone): | |
if phone in _fix: return _fix[phone] | |
else: return phone | |
def mkdir_p(path): | |
""" | |
Simulate the functionality of shell's 'mkdir -p' ("create a directory if | |
it doesn't already exist" | |
""" | |
try: | |
makedirs(path) | |
except OSError as exc: | |
if exc.errno == EEXIST: pass | |
else: raise | |
def ln_sf(src, dst): | |
""" | |
Simulate the functionality of shell's 'ln -sf' ("Overwrite a symlink") | |
""" | |
try: | |
symlink(src, dst) | |
except OSError as exc: | |
if exc.errno == EEXIST: | |
remove(dst) | |
symlink(src, dst) | |
if __name__ == '__main__': | |
# make some directories, etc. | |
copytree(OLDTIMIT + '/DOC', NEWTIMIT + '/DOC') # creating the dir NEWTIMIT | |
mkdir_p(NEWTIMIT + '/TGD/') | |
mkdir_p(NEWTIMIT + '/TEST-D') | |
mkdir_p(NEWTIMIT + '/TRAIN-D') | |
mkdir_p(NEWTIMIT + '/TEST-S') | |
mkdir_p(NEWTIMIT + '/TRAIN-S') | |
mkdir_p(NEWTIMIT + '/TEST-D-S') | |
mkdir_p(NEWTIMIT + '/TRAIN-D-S') | |
for fid in iglob(OLDTIMIT + '/*/DR[1-9]/*/*.WAV'): | |
# split up path | |
(head, tail) = path.split(fid) | |
(root, ext) = path.splitext(tail) | |
(junk, category, dialect, speaker) = head.split('/') | |
dash_d = NEWTIMIT + '/' + category + '-D/' + dialect | |
dash_s = NEWTIMIT + '/' + category + '-S/' + speaker | |
dash_d_dash_s = NEWTIMIT + '/' + category + '-D-S' | |
lead = category + '_' + dialect + '_' + speaker + '_' + root | |
fullpath = NEWTIMIT + '/' + category + '/' + dialect + '/' + speaker | |
# make directory, just in case | |
if not path.exists(fullpath): mkdir_p(fullpath) | |
if not path.exists(dash_d): mkdir_p(dash_d) | |
if not path.exists(dash_s): mkdir_p(dash_s) | |
if not path.exists(dash_d_dash_s): mkdir_p(dash_d_dash_s) | |
# resample into 16-bit-sample RIFF headered WAV file | |
wavfid = fullpath + '/' + lead + '.wav' | |
popen('sox ' + fid + ' -b 16 ' + wavfid) | |
# keep a list of phones to split | |
split_list = [] | |
# open 'WRD' file | |
wrd = IntervalTier('word') | |
for line in open(head + '/' + root + '.WRD', 'r'): | |
(start, stop, word) = line.rstrip().split(None, 2) | |
start = int(start) / SR | |
stop = int(stop) / SR | |
# catch and fix overlap | |
try: | |
wrd.add(start, stop, word.upper()) | |
except ValueError as (a, b): | |
olap = (a.maxTime - b.minTime) / 2. | |
a.maxTime -= olap | |
b.minTime += olap | |
wrd.addInterval(b) | |
split_list.append(a.maxTime) | |
# open 'PHN' file | |
phn = IntervalTier('phone') | |
for line in open(head + '/' + root + '.PHN', 'r'): | |
(start, stop, phone) = line.rstrip().split(None, 2) | |
start = int(start) / SR | |
stop = int(stop) / SR | |
phone = transform_phone(phone.upper()) | |
# check for phone needing to be split, or just write out | |
if split_list and start < split_list[0] < stop: | |
phn.add(start, split_list[0], phone) | |
phn.add(split_list[0], stop, phone) | |
split_list.pop(0) | |
else: | |
phn.add(start, stop, phone) | |
# write '.TextGrid' file | |
tg = TextGrid() | |
tg.extend((phn, wrd)) | |
tg.write(NEWTIMIT + '/TGD/' + lead + '.TextGrid') | |
# write '.lab' file | |
wordstring = ' '.join([w.mark for w in wrd]) | |
labfid = fullpath + '/' + lead + '.lab' | |
open(fullpath + '/' + lead + '.lab', 'w').write(wordstring) | |
# make symlinks using absolute paths | |
ln_sf(path.abspath(labfid), path.abspath(dash_d) + '/' + lead + '.lab') | |
ln_sf(path.abspath(wavfid), path.abspath(dash_d) + '/' + lead + '.wav') | |
ln_sf(path.abspath(labfid), path.abspath(dash_s) + '/' + lead + '.lab') | |
ln_sf(path.abspath(wavfid), path.abspath(dash_s) + '/' + lead + '.wav') | |
ln_sf(path.abspath(labfid), path.abspath(dash_d_dash_s) + '/' + | |
lead + '.lab') | |
ln_sf(path.abspath(wavfid), path.abspath(dash_d_dash_s) + '/' + | |
lead + '.wav') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment