Skip to content

Instantly share code, notes, and snippets.

@puterleat
Created January 22, 2015 11:08
Show Gist options
  • Select an option

  • Save puterleat/ad5eae94f8814394f0d0 to your computer and use it in GitHub Desktop.

Select an option

Save puterleat/ad5eae94f8814394f0d0 to your computer and use it in GitHub Desktop.
auto chunk speech files for prosody project
#!/usr/local/bin/python
"""Script to select audio chunks from speech files."""
import operator
import os
import pydub
import itertools
# set this to wherever you want the new sliced audio files saving
# and run the script from the director containing the raw audio files
# (or a copy to be safe)
NEWFILESPATH = "/Users/ben/Documents/ReframedAudioAnalysis/sliced/"
onlyfiles = [i for i in [ f for f in os.listdir(".") if os.path.isfile(f) ] if i[0] != "."]
filesandextensions = zip(onlyfiles, [(i, j.replace(".", "")) for i, j in map(os.path.splitext, onlyfiles)])
# take a slice from a list by passing a tuple
tupslice = lambda l, tup: l[tup[0]:tup[1]]
def takechunks(audio, chunks, gap=10):
gap = pydub.AudioSegment.silent(1000*gap) # 10 sec gap
slices = itertools.chain(*
itertools.izip(
(tupslice(audio, i) for i in chunks),
itertools.repeat(gap)
)
)
return reduce(operator.add, slices)
m = lambda m: m*1000*60 # calculate n miliseconds for n mintes
def choose_chunks(audio, initial_chunk=(0,5), nchunks, chunklength):
"""Take a chunk at the start, plus n other chunks of length chunklength
from the file"""
in1, out1 = initial_chunk # default is first 5 minutes
start = ((m(in1), m(out1)), )
totalmins = len(audio)/1000/60
# subtract chunklength from totalmins so we have enough for the last chunk
parts = (totalmins-chunklength / nchunks * i for i in range(1, nchunks))
partchunks = ( ((m(i), m(i+chunklength))) for i in parts )
return start + tuple(partchunks)
for orig, tup in filesandextensions:
print orig
n, ext = tup
n = n.replace(" ","-").lower().replace(",","")
try:
raw = pydub.AudioSegment.from_file(orig, ext)
chunks = choose_chunks(raw, initial_chunk=(2,5), nchunks=5, chunklength=2)
sliced = takechunks(raw, chunks, 5)
print chunks
sliced.export(os.path.join(NEWFILESPATH, n+os.extsep+"wav"), format="wav")
except Exception as e:
print orig, e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment