andreaschandra · November 16, 2018 04:13
diff --git a/main4.py b/main4.py
 def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
    triggered = False

    voiced_frames = []
    for frame in frames:
        is_speech = vad.is_speech(frame.bytes, sample_rate)

        sys.stdout.write('1' if is_speech else '0')
        if not triggered:
            ring_buffer.append((frame, is_speech))
            num_voiced = len([f for f, speech in ring_buffer if speech])
            # If we're NOTTRIGGERED and more than 90% of the frames in
            # the ring buffer are voiced frames, then enter the
            # TRIGGERED state.
            if num_voiced > 0.9 * ring_buffer.maxlen:
                triggered = True
                sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
                # We want to yield all the audio we see from now until
                # we are NOTTRIGGERED, but we have to start with the
                # audio that's already in the ring buffer.
                for f, s in ring_buffer:
                    voiced_frames.append(f)
                ring_buffer.clear()
        else:
            # We're in the TRIGGERED state, so collect the audio data
            # and add it to the ring buffer.
            voiced_frames.append(frame)
            ring_buffer.append((frame, is_speech))
            num_unvoiced = len([f for f, speech in ring_buffer if not speech])
            # If more than 90% of the frames in the ring buffer are
            # unvoiced, then enter NOTTRIGGERED and yield whatever
            # audio we've collected.
            if num_unvoiced > 0.9 * ring_buffer.maxlen:
                sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
                triggered = False
                yield b''.join([f.bytes for f in voiced_frames])
                ring_buffer.clear()
                voiced_frames = []
    if triggered:
        sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
    sys.stdout.write('\n')
    # If we have any leftover voiced audio when we run out of input,
    # yield it.
    if voiced_frames:
        yield b''.join([f.bytes for f in voiced_frames])
	def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
	triggered = False

	voiced_frames = []
	for frame in frames:
	is_speech = vad.is_speech(frame.bytes, sample_rate)

	sys.stdout.write('1' if is_speech else '0')
	if not triggered:
	ring_buffer.append((frame, is_speech))
	num_voiced = len([f for f, speech in ring_buffer if speech])
	# If we're NOTTRIGGERED and more than 90% of the frames in
	# the ring buffer are voiced frames, then enter the
	# TRIGGERED state.
	if num_voiced > 0.9 * ring_buffer.maxlen:
	triggered = True
	sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
	# We want to yield all the audio we see from now until
	# we are NOTTRIGGERED, but we have to start with the
	# audio that's already in the ring buffer.
	for f, s in ring_buffer:
	voiced_frames.append(f)
	ring_buffer.clear()
	else:
	# We're in the TRIGGERED state, so collect the audio data
	# and add it to the ring buffer.
	voiced_frames.append(frame)
	ring_buffer.append((frame, is_speech))
	num_unvoiced = len([f for f, speech in ring_buffer if not speech])
	# If more than 90% of the frames in the ring buffer are
	# unvoiced, then enter NOTTRIGGERED and yield whatever
	# audio we've collected.
	if num_unvoiced > 0.9 * ring_buffer.maxlen:
	sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
	triggered = False
	yield b''.join([f.bytes for f in voiced_frames])
	ring_buffer.clear()
	voiced_frames = []
	if triggered:
	sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
	sys.stdout.write('\n')
	# If we have any leftover voiced audio when we run out of input,
	# yield it.
	if voiced_frames:
	yield b''.join([f.bytes for f in voiced_frames])
No results found