Skip to content

Instantly share code, notes, and snippets.

@thekid
Last active October 28, 2024 08:52
Show Gist options
  • Save thekid/678620dfb548e6a972cb268c8b5b7899 to your computer and use it in GitHub Desktop.
Save thekid/678620dfb548e6a972cb268c8b5b7899 to your computer and use it in GitHub Desktop.
CLI audio chat with ffmpeg and Azure AI realtime API
<?php namespace test;
use com\openai\realtime\RealtimeApi;
use util\cmd\Console;
use lang\Process;
const SPINNER= ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
// Wave file header
function wave($numChannels, $sampleRate, $bitsPerSample, $durationSeconds) {
$numSamples= $sampleRate * $durationSeconds;
$byteRate= $sampleRate * $numChannels * $bitsPerSample / 8;
$blockAlign= $numChannels * $bitsPerSample / 8;
$dataSize= $numSamples * $blockAlign;
return pack(
'a*Va*a*VvvVVvva*V',
// Header
'RIFF',
44 + $dataSize - 8,
'WAVE',
// First chunk with audio format PCM (1)
'fmt ',
16,
1,
$numChannels,
$sampleRate,
$byteRate,
$blockAlign,
$bitsPerSample,
// Second chunk
'data',
$dataSize
);
}
$remote= 'wss://example.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview';
$auth= ['api-key' => getenv('AZUREAI_API_KEY')];
$api= new RealtimeApi($remote);
$api->connect($auth);
// 'amuch', 'dan', 'elan', 'marilyn', 'breeze', 'cove', 'ember', 'jupiter', 'alloy', 'echo', and 'shimmer'
Console::writeLine($api->transmit([
'type' => 'session.update',
'session' => ['voice' => $argv[1] ?? 'alloy', 'turn_detection' => null],
]));
$record= new Process(
'ffmpeg',
['-v', '0', '-f', 'pulse', '-sample_rate', '24000', '-channels', '1', '-i', 'default', '-f', 'wav', '-'],
null,
null,
[0 => STDIN, 2 => STDERR]
);
$record->out->read(44); // Discard headers
Console::write("\033[37;1mListening, press 'q' to end \033[0m[ ");
$i= 0;
do {
Console::write("\010", SPINNER[$i++ % sizeof(SPINNER)]);
$chunk= $record->out->read();
strlen($chunk) && $api->send([
'type' => 'input_audio_buffer.append',
'audio' => base64_encode($chunk),
]);
} while (!$record->out->eof());
$record->out->close();
$record->close();
Console::writeLine("\010", $i, ' chunk(s) transmitted]');
$api->transmit(['type' => 'input_audio_buffer.commit']);
$api->transmit([
'type' => 'response.create',
'response' => ['modalities' => ['audio', 'text']],
]);
// 24kHz mono pcm16, 120 seconds (?!)
$proc= new Process('ffplay', ['-v', '0', '-nodisp', '-autoexit', '-'], null, null, [1 => STDOUT, 2 => STDERR]);
$proc->in->write(wave(1, 24_000, 16, 120));
Console::writeLine("\033[34m");
do {
$event= $api->receive();
switch ($event['type']) {
case 'response.audio.delta':
$proc->in->write(base64_decode($event['delta']));
break;
case 'response.audio_transcript.delta':
Console::write($event['delta']);
break;
case 'error':
Console::$err->writeLine();
Console::$err->writeLine($event);
break 2;
}
} while ('response.done' !== $event['type']);
$proc->in->close();
$proc->close();
$api->close();
Console::writeLine("\033[0m");
@thekid
Copy link
Author

thekid commented Oct 28, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment