Last active
August 29, 2015 14:17
-
-
Save sotelo/8927e18841cf2699d287 to your computer and use it in GitHub Desktop.
phoneme extraction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[ ]: | |
import re | |
import commands | |
import numpy as np | |
import scipy.io.wavfile | |
import itertools | |
import cPickle | |
# In[ ]: | |
#cd /u/sotelo/ez-phones | |
#115 | |
# In[ ]: | |
numfile = 0 | |
# In[ ]: | |
def sec2step(seconds): | |
return np.round(16000*(float(seconds))).astype('int') | |
def phoneme_list(x): | |
# Given the output of pocketsphinx compute the list with the phonemes. | |
data = [ re.split(' ',line) for line in x[1:]] | |
ph, t1, t2 = zip(*[ line[0:3] for line in data if re.match("^\d+?\.\d+?$", line[1]) is not None]) | |
t1 = map(float,t1) | |
t2 = map(float,t2) | |
ph = list(ph) | |
if t1[0] > 0.0: | |
t2 = [t1[0]] + t2 | |
t1 = [0.0] + t1 | |
ph = ['*SIL'] + ph | |
if t2[-1] < 2.0: | |
t1 = t1 + [t2[-1]] | |
t2 = t2 + [2.0] | |
ph = ph + ['*SIL'] | |
result = [sec2step(t1[i+1]-t2[i])*['*SIL'] + sec2step(t2[i+1]-t1[i+1])*[ph[i+1]] for i in range(len(t2)-1)] | |
result = list(itertools.chain(*result)) | |
return sec2step(t2[0]-t1[0])*[ph[0]] + result | |
# In[ ]: | |
NUMFILES = range(96,100) | |
for numfile in NUMFILES: | |
print "New File" | |
x = np.load('/data/lisatmp3/Lessac_Blizzard2013_segmented/train/sf_train_segmented_%i.npy' % numfile) | |
results = [] | |
for i, z in enumerate(x): | |
# Create temporary wav file and process with pocketsphinx | |
scipy.io.wavfile.write('/dev/shm/temp.wav',16000, z.astype(np.int16)) | |
txt =commands.getstatusoutput('bash ps_shortcut.sh /dev/shm/temp.wav')[1] | |
# Hack to get the results of pocketsphinx | |
txt = re.split('\n',txt) | |
last = next(j for j,line in enumerate(txt[::-1]) if re.match('INFO', line) is not None) | |
# Parse results and append to results | |
try: | |
results.append(phoneme_list(txt[-last:])) | |
except: | |
results.append(['*SIL']*32000) | |
print "Error" | |
print i | |
phonemes = set(itertools.chain(*results)) | |
phonemes = sorted(list(phonemes)) | |
ph_list = cPickle.load( open( "ph_list.p", "rb" ) ) | |
assert ph_list == phonemes | |
phonemes = {key:i for i,key in enumerate(phonemes)} | |
results_array = np.array([np.array([phonemes[step] for step in phon_list]) for phon_list in results]) | |
np.save('/data/lisatmp3/Blizzard2013_phonemes_pocketsphinx/train/sf_train_segmented_ph_%i.npy' % numfile,results_array) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment