kwindla · April 23, 2025 22:48
diff --git a/detective-story.py b/detective-story.py
 import asyncio

 from openai import AsyncOpenAI
 from openai.helpers import LocalAudioPlayer

 import wave
 import numpy as np

 openai = AsyncOpenAI()

 narrator_instructions = """
 The speaker is narrating a detective story.

 Voice Affect: Quietly dramatic.

 Tone: Erudite. Learned. British accent.

 Pacing: Slow and steady.

 Emotion: Steady.

 Pronunciation: Clear and precise.\n\nPauses: Brief pauses for narrative impact.
 """


 def character_instructions(append):
    return f"""An American woman. A character in a detective story.

 Speaking quickly. Trying not to be overheard.

 Tone: {append}"""


 narrator = [
    (
        "It was a dark and stormy night. The detective crept over to the window. A woman was talking to someone near the rosebeds.",
        narrator_instructions,
    ),
    (
        "Just then, the clock struck midnight. The woman turned, startled, and seemed to look right into the darkened room at the detective.",
        narrator_instructions,
    ),
    ("... she said, and then hurried around the corner.", narrator_instructions),
 ]

 character = [
    ("shhh, we need to be quiet ...", character_instructions("Whispering.")),
    ("I don't think we have a chance of getting away with it.", character_instructions("Fearful.")),
    ("Is there someone there?", character_instructions("Startled.")),
 ]


 async def main() -> None:
    player = LocalAudioPlayer()
    all_segments: list[np.ndarray] = []  # store audio buffers for wav output

    # Iterate over character and narrator tuples simultaneously
    for char_line, narr_line in zip(character, narrator):
        # Play the character line first
        char_input, char_instructions = char_line
        async with openai.audio.speech.with_streaming_response.create(
            model="gpt-4o-mini-tts",
            voice="alloy",  # character voice
            input=char_input,
            instructions=char_instructions,
            response_format="pcm",
        ) as response:
            # Convert streamed response to numpy buffer once
            buffer = await player._tts_response_to_buffer(response)
            await player.play(buffer)
            all_segments.append(buffer)

        # Then play the narrator line
        narr_input, narr_instructions = narr_line
        async with openai.audio.speech.with_streaming_response.create(
            model="gpt-4o-mini-tts",
            voice="ballad",  # narrator voice
            input=narr_input,
            instructions=narr_instructions,
            response_format="pcm",
        ) as response:
            buffer = await player._tts_response_to_buffer(response)
            await player.play(buffer)
            all_segments.append(buffer)

    # After all segments are played, save concatenated output to WAV
    if all_segments:
        combined = np.concatenate(all_segments, axis=0).flatten()
        int16_samples = (combined * 32767.0).astype(np.int16)
        with wave.open("output.wav", "wb") as wf:
            wf.setnchannels(1)  # mono
            wf.setsampwidth(2)  # 16-bit
            wf.setframerate(24000)  # match SAMPLE_RATE used by LocalAudioPlayer
            wf.writeframes(int16_samples.tobytes())


 if __name__ == "__main__":
    asyncio.run(main())
	import asyncio

	from openai import AsyncOpenAI
	from openai.helpers import LocalAudioPlayer

	import wave
	import numpy as np

	openai = AsyncOpenAI()

	narrator_instructions = """
	The speaker is narrating a detective story.

	Voice Affect: Quietly dramatic.

	Tone: Erudite. Learned. British accent.

	Pacing: Slow and steady.

	Emotion: Steady.

	Pronunciation: Clear and precise.\n\nPauses: Brief pauses for narrative impact.
	"""


	def character_instructions(append):
	return f"""An American woman. A character in a detective story.

	Speaking quickly. Trying not to be overheard.

	Tone: {append}"""


	narrator = [
	(
	"It was a dark and stormy night. The detective crept over to the window. A woman was talking to someone near the rosebeds.",
	narrator_instructions,
	),
	(
	"Just then, the clock struck midnight. The woman turned, startled, and seemed to look right into the darkened room at the detective.",
	narrator_instructions,
	),
	("... she said, and then hurried around the corner.", narrator_instructions),
	]

	character = [
	("shhh, we need to be quiet ...", character_instructions("Whispering.")),
	("I don't think we have a chance of getting away with it.", character_instructions("Fearful.")),
	("Is there someone there?", character_instructions("Startled.")),
	]


	async def main() -> None:
	player = LocalAudioPlayer()
	all_segments: list[np.ndarray] = [] # store audio buffers for wav output

	# Iterate over character and narrator tuples simultaneously
	for char_line, narr_line in zip(character, narrator):
	# Play the character line first
	char_input, char_instructions = char_line
	async with openai.audio.speech.with_streaming_response.create(
	model="gpt-4o-mini-tts",
	voice="alloy", # character voice
	input=char_input,
	instructions=char_instructions,
	response_format="pcm",
	) as response:
	# Convert streamed response to numpy buffer once
	buffer = await player._tts_response_to_buffer(response)
	await player.play(buffer)
	all_segments.append(buffer)

	# Then play the narrator line
	narr_input, narr_instructions = narr_line
	async with openai.audio.speech.with_streaming_response.create(
	model="gpt-4o-mini-tts",
	voice="ballad", # narrator voice
	input=narr_input,
	instructions=narr_instructions,
	response_format="pcm",
	) as response:
	buffer = await player._tts_response_to_buffer(response)
	await player.play(buffer)
	all_segments.append(buffer)

	# After all segments are played, save concatenated output to WAV
	if all_segments:
	combined = np.concatenate(all_segments, axis=0).flatten()
	int16_samples = (combined * 32767.0).astype(np.int16)
	with wave.open("output.wav", "wb") as wf:
	wf.setnchannels(1) # mono
	wf.setsampwidth(2) # 16-bit
	wf.setframerate(24000) # match SAMPLE_RATE used by LocalAudioPlayer
	wf.writeframes(int16_samples.tobytes())


	if __name__ == "__main__":
	asyncio.run(main())