puterleat · January 22, 2015 11:08
diff --git a/gistfile1.txt b/gistfile1.txt

 #!/usr/local/bin/python

 """Script to select audio chunks from speech files."""

 import operator
 import os
 import pydub
 import itertools


 # set this to wherever you want the new sliced audio files saving
 # and run the script from the director containing the raw audio files 
 # (or a copy to be safe)

 NEWFILESPATH = "/Users/ben/Documents/ReframedAudioAnalysis/sliced/"


 onlyfiles = [i for i in [ f for f in os.listdir(".") if os.path.isfile(f) ] if i[0] != "."]
 filesandextensions = zip(onlyfiles, [(i, j.replace(".", "")) for i, j in map(os.path.splitext, onlyfiles)])


 # take a slice from a list by passing a tuple
 tupslice = lambda l, tup: l[tup[0]:tup[1]]


 def takechunks(audio, chunks, gap=10): 
    gap = pydub.AudioSegment.silent(1000*gap) # 10 sec gap
    slices = itertools.chain(*
        itertools.izip(
            (tupslice(audio, i) for i in chunks),
            itertools.repeat(gap)
        )
    )
    return reduce(operator.add, slices)


 m = lambda m: m*1000*60 # calculate n miliseconds for n mintes

 def choose_chunks(audio, initial_chunk=(0,5), nchunks, chunklength):
    """Take a chunk at the start, plus n other chunks of length chunklength 
    from the file"""

    in1, out1 = initial_chunk # default is first 5 minutes
    start = ((m(in1), m(out1)), )
    totalmins = len(audio)/1000/60
    # subtract chunklength from totalmins so we have enough for the last chunk
    parts = (totalmins-chunklength / nchunks * i for i in range(1, nchunks))
    partchunks = ( ((m(i), m(i+chunklength))) for i in parts )
    return start + tuple(partchunks)


 for orig, tup in filesandextensions:
    print orig
    n, ext = tup
    n = n.replace(" ","-").lower().replace(",","")

    try:
        raw = pydub.AudioSegment.from_file(orig, ext)
        chunks = choose_chunks(raw, initial_chunk=(2,5), nchunks=5, chunklength=2)
        sliced = takechunks(raw, chunks, 5)
        print chunks
        sliced.export(os.path.join(NEWFILESPATH, n+os.extsep+"wav"), format="wav")
    except Exception as e: 
        print orig, e

	#!/usr/local/bin/python

	"""Script to select audio chunks from speech files."""

	import operator
	import os
	import pydub
	import itertools


	# set this to wherever you want the new sliced audio files saving
	# and run the script from the director containing the raw audio files
	# (or a copy to be safe)

	NEWFILESPATH = "/Users/ben/Documents/ReframedAudioAnalysis/sliced/"


	onlyfiles = [i for i in [ f for f in os.listdir(".") if os.path.isfile(f) ] if i[0] != "."]
	filesandextensions = zip(onlyfiles, [(i, j.replace(".", "")) for i, j in map(os.path.splitext, onlyfiles)])


	# take a slice from a list by passing a tuple
	tupslice = lambda l, tup: l[tup[0]:tup[1]]


	def takechunks(audio, chunks, gap=10):
	gap = pydub.AudioSegment.silent(1000*gap) # 10 sec gap
	slices = itertools.chain(*
	itertools.izip(
	(tupslice(audio, i) for i in chunks),
	itertools.repeat(gap)
	)
	)
	return reduce(operator.add, slices)


	m = lambda m: m100060 # calculate n miliseconds for n mintes

	def choose_chunks(audio, initial_chunk=(0,5), nchunks, chunklength):
	"""Take a chunk at the start, plus n other chunks of length chunklength
	from the file"""

	in1, out1 = initial_chunk # default is first 5 minutes
	start = ((m(in1), m(out1)), )
	totalmins = len(audio)/1000/60
	# subtract chunklength from totalmins so we have enough for the last chunk
	parts = (totalmins-chunklength / nchunks * i for i in range(1, nchunks))
	partchunks = ( ((m(i), m(i+chunklength))) for i in parts )
	return start + tuple(partchunks)


	for orig, tup in filesandextensions:
	print orig
	n, ext = tup
	n = n.replace(" ","-").lower().replace(",","")

	try:
	raw = pydub.AudioSegment.from_file(orig, ext)
	chunks = choose_chunks(raw, initial_chunk=(2,5), nchunks=5, chunklength=2)
	sliced = takechunks(raw, chunks, 5)
	print chunks
	sliced.export(os.path.join(NEWFILESPATH, n+os.extsep+"wav"), format="wav")
	except Exception as e:
	print orig, e
No results found