Last active
October 15, 2024 16:55
-
-
Save MawKKe/c68c061cb0b69e5011a5cc419bb968fa to your computer and use it in GitHub Desktop.
Demo - Playback of generated audio stream with ffplay (ffmpeg) in real time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"2024-10-15 19:54:26.653 ffplay[98661:3541394] +[IMKClient subclass]: chose IMKClient_Legacy\n", | |
"2024-10-15 19:54:26.653 ffplay[98661:3541394] +[IMKInputSession subclass]: chose IMKInputSession_Legacy\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"total bytes: 1327104 (requested: 1323000), total samples: 663552.0\n", | |
"took: 15.47s\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "", | |
"text/plain": [ | |
"<Figure size 640x480 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import subprocess\n", | |
"import typing as t\n", | |
"import time\n", | |
"import io\n", | |
"import contextlib\n", | |
"from dataclasses import dataclass\n", | |
"\n", | |
"import numpy as np\n", | |
"import numpy.typing as npt\n", | |
"import pandas as pd\n", | |
"\n", | |
"\n", | |
"\"\"\"\n", | |
"Playback of generated audio stream with ffplay (ffmpeg) in real time\n", | |
"\n", | |
"Audio processing is implemented as a pipeline, which keeps memory\n", | |
"usage minimal.\n", | |
"\n", | |
"NOTE: assumes ffplay (ffmpeg) is installed and available via $PATH.\n", | |
"\n", | |
"Setup:\n", | |
" $ pip install numpy pandas\n", | |
"\n", | |
"Run:\n", | |
" Open the notebook in jupyter or VSCode (with jupyter extension\n", | |
" installed). Or copy-paste the code into normal .py file and\n", | |
" run as usual.\n", | |
"\n", | |
"Author:\n", | |
" Markus H (MawKKe) 2024-10-15\n", | |
" https://github.com/MawKKe\n", | |
"\"\"\"\n", | |
"\n", | |
"\n", | |
"def make_tone(freq: float, sample_rate: int, num_samples: int) -> npt.NDArray:\n", | |
" t = np.arange(0, num_samples) / sample_rate\n", | |
" return np.sin(2 * np.pi * freq * t)\n", | |
"\n", | |
"\n", | |
"def gen_audio_tone_chunked(freq: int, sample_rate: int, chunk_size: int = 4096):\n", | |
" assert chunk_size <= 2 * sample_rate # magic constants\n", | |
"\n", | |
" y = make_tone(freq=freq, sample_rate=sample_rate, num_samples=2 * sample_rate)\n", | |
"\n", | |
" t = 0\n", | |
" while True:\n", | |
" yield y[t : t + chunk_size]\n", | |
" t = (t + chunk_size) % sample_rate\n", | |
"\n", | |
"\n", | |
"def convert_to_s16le(audio: npt.NDArray[np.float32]) -> bytes:\n", | |
" return (audio * (2**15 - 1)).astype('<i2').tobytes()\n", | |
"\n", | |
"\n", | |
"@contextlib.contextmanager\n", | |
"def ffplay(audio_format: str, sample_rate: int) -> t.Generator:\n", | |
" cmd = [\n", | |
" 'ffplay',\n", | |
" '-hide_banner',\n", | |
" '-loglevel',\n", | |
" 'error',\n", | |
" '-f',\n", | |
" str(audio_format),\n", | |
" '-ar',\n", | |
" str(sample_rate),\n", | |
" '-vn',\n", | |
" '-autoexit',\n", | |
" '-i',\n", | |
" '-',\n", | |
" ]\n", | |
" try:\n", | |
" proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)\n", | |
" # suppress mypy complaint 'NoneType has no .write'\n", | |
" proc.stdin = t.cast(io.BufferedWriter, proc.stdin)\n", | |
"\n", | |
" def player(chunk: bytes) -> int:\n", | |
" # return number of bytes written\n", | |
" return proc.stdin.raw.write(chunk)\n", | |
"\n", | |
" yield player\n", | |
" finally:\n", | |
" proc.stdin.flush()\n", | |
" proc.stdin.close()\n", | |
" proc.wait()\n", | |
"\n", | |
"\n", | |
"@dataclass\n", | |
"class AudioFormat:\n", | |
" name: str\n", | |
" sample_size: int\n", | |
" converter: t.Callable[[npt.NDArray], bytes]\n", | |
"\n", | |
"\n", | |
"S16LE = AudioFormat('s16le', 2, convert_to_s16le)\n", | |
"\n", | |
"\n", | |
"def main():\n", | |
" volume = 0.1\n", | |
" chunk_size = 4096\n", | |
" sample_rate = 44100\n", | |
" freq = 440\n", | |
" duration = 15\n", | |
"\n", | |
" # infinite stream of audio samples of sine at 'freq', chunked\n", | |
" audio_chunks = gen_audio_tone_chunked(freq=freq, sample_rate=sample_rate, chunk_size=chunk_size)\n", | |
"\n", | |
" def run_player(chunk_stream, afmt: AudioFormat, max_samples: int):\n", | |
" start = time.time()\n", | |
" tot_bytes = 0\n", | |
" with ffplay(audio_format=afmt.name, sample_rate=sample_rate) as play:\n", | |
" for chunk in chunk_stream:\n", | |
" if tot_bytes >= (afmt.sample_size * max_samples):\n", | |
" break\n", | |
"\n", | |
" tot_bytes += play(afmt.converter(volume * chunk))\n", | |
"\n", | |
" yield {\n", | |
" 't': time.time() - start,\n", | |
" 'bytes': tot_bytes,\n", | |
" 'samples': tot_bytes // afmt.sample_size,\n", | |
" }\n", | |
"\n", | |
" print(\n", | |
" f'total bytes: {tot_bytes} (requested: {2 * max_samples}), total samples: {tot_bytes/2}'\n", | |
" )\n", | |
"\n", | |
" start_t = time.time()\n", | |
"\n", | |
" # DataFrame.from_records is the \"sink\" that consumes the generator pipeline\n", | |
" df = pd.DataFrame.from_records(\n", | |
" run_player(chunk_stream=audio_chunks, afmt=S16LE, max_samples=duration * sample_rate)\n", | |
" )\n", | |
"\n", | |
" end_t = time.time()\n", | |
"\n", | |
" print(f'took: {end_t-start_t:.2f}s')\n", | |
"\n", | |
" return df\n", | |
"\n", | |
"\n", | |
"def plot_stdin_datarate(df: pd.DataFrame) -> None:\n", | |
" ax = df.plot(x='t', y='samples')\n", | |
" ax.set_title('ffplay stdin datarate')\n", | |
" ax.set_xlabel('Time(s)')\n", | |
" ax.set_ylabel('Samples')\n", | |
" ax.grid(visible=True)\n", | |
"\n", | |
"\n", | |
"df = main()\n", | |
"plot_stdin_datarate(df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Clearly there is initial burst transfer of data into the ffplay process; after the buffer is saturated,\n", | |
"the writes start to block in the producer (python). Roughly eyeballing the slope of the figure between\n", | |
"$2..8s$ results in 44k-45k samples per second, which matches the audio sink rate. Neat 🙂" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.13.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment