Last active
October 5, 2022 22:02
-
-
Save austinbhale/c7bb0cef3676259c369cf3e2fe45e962 to your computer and use it in GitHub Desktop.
SK Media Capture to Audio Graph's device output on the HL2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace SKAudioGraph | |
{ | |
using StereoKit; | |
using System; | |
using System.Linq; | |
using System.Runtime.InteropServices; | |
using System.Threading; | |
using System.Threading.Tasks; | |
using Windows.Foundation; | |
using Windows.Media; | |
using Windows.Media.Audio; | |
using Windows.Media.Capture; | |
using Windows.Media.Capture.Frames; | |
using Windows.Media.MediaProperties; | |
using Windows.Media.Render; | |
internal class Program | |
{ | |
private static AudioGraph graph; | |
private static AudioDeviceOutputNode deviceOutputNode; | |
private static AudioFrameInputNode frameInputNode; | |
private enum MenuState | |
{ | |
Main, | |
StartingAudio, | |
StoppingAudio, | |
} | |
private static MenuState State | |
{ | |
get { return (MenuState)Interlocked.CompareExchange(ref state, 0, 0); } | |
set { Interlocked.Exchange(ref state, (int)value); } | |
} | |
private static int state = 0; | |
private static double outgoingGain = 1; | |
static void Main(string[] args) | |
{ | |
// Initialize StereoKit | |
SKSettings settings = new SKSettings | |
{ | |
appName = "SKAudioGraph", | |
assetsFolder = "Assets", | |
}; | |
if (!SK.Initialize(settings)) | |
Environment.Exit(1); | |
Vec3 menuPosition = Input.Head.position + Input.Head.Forward * 0.6f + Vec3.Right * 0.15f; | |
Pose menuPose = new Pose(menuPosition, Quat.LookAt(menuPosition, Input.Head.position)); | |
Vec2 menuSize = new Vec2(20, 10) * U.cm; | |
string audioText = "Play"; | |
(AudioEncodingProperties audioEncodingProperties, MediaFrameReader audioFrameReader) = InitializeMediaCaptureAsync().GetAwaiter().GetResult(); | |
CreateAudioGraph(audioEncodingProperties).GetAwaiter().GetResult(); | |
var audioFrameHandler = CreateMediaFrameHandler(); | |
// Core application loop | |
while (SK.Step(() => | |
{ | |
UI.WindowBegin("Audio Stream", ref menuPose, menuSize, moveType: UIMove.Exact); | |
switch (State) | |
{ | |
case MenuState.Main: | |
if (UI.Button(audioText)) | |
{ | |
if (audioText.Equals("Play")) | |
{ | |
State = MenuState.StartingAudio; | |
audioFrameReader.StartAsync().AsTask().ContinueWith(audioStatus => | |
{ | |
if (audioStatus.Result != MediaFrameReaderStartStatus.Success) | |
{ | |
throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}"); | |
} | |
frameInputNode.Start(); | |
audioFrameReader.FrameArrived += audioFrameHandler; | |
audioText = "Stop"; | |
State = MenuState.Main; | |
}); | |
} | |
else | |
{ | |
State = MenuState.StoppingAudio; | |
audioFrameReader.StopAsync().AsTask().ContinueWith(_ => | |
{ | |
frameInputNode.Stop(); | |
audioFrameReader.FrameArrived -= audioFrameHandler; | |
audioText = "Play"; | |
State = MenuState.Main; | |
}); | |
} | |
} | |
break; | |
case MenuState.StartingAudio: | |
UI.Button("Play"); // does nothing... just for show | |
break; | |
case MenuState.StoppingAudio: | |
UI.Button("Stop"); // does nothing... just for show | |
break; | |
} | |
UI.Space(2 * U.cm); | |
float sliderMin = 0; | |
float sliderMax = 8; | |
float sliderStep = 0.5f; | |
float sliderWidth = menuSize.x; | |
UI.PanelBegin(UIPad.None); | |
if (UI.HSlider("Gain", ref outgoingGain, sliderMin, sliderMax, sliderStep, sliderWidth, UIConfirm.Pinch)) | |
{ | |
frameInputNode.OutgoingGain = outgoingGain; | |
} | |
UI.PanelEnd(); | |
// normalized to [-0.5,0.5] in cm | |
float percentNormalized = (float)outgoingGain / sliderMax - 0.5f; | |
Text.Add( | |
$"{string.Format("{0:0.0}", Math.Truncate(outgoingGain * 10) / 10)}", | |
Matrix.TS(UI.LayoutLast.center, V.XXX(0.5f)), | |
offX: -percentNormalized * UI.LayoutLast.dimensions.x * 2, | |
offY: -UI.LayoutLast.dimensions.y * 1.5f, | |
offZ: -0.001f | |
); | |
UI.Space(2 * U.cm); | |
if (UI.Button("Exit")) | |
{ | |
SK.Quit(); | |
} | |
UI.WindowEnd(); | |
})) | |
{ | |
} | |
; | |
SK.Shutdown(); | |
} | |
/// <summary> | |
/// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams. | |
/// </summary> | |
/// <returns>A task representing the asynchronous operation.</returns> | |
private static async Task<(AudioEncodingProperties, MediaFrameReader)> InitializeMediaCaptureAsync() | |
{ | |
// Try to find the media capture settings for the requested capture configuration | |
var settings = new MediaCaptureInitializationSettings | |
{ | |
AudioProcessing = AudioProcessing.Default, | |
MediaCategory = MediaCategory.Speech, | |
StreamingCaptureMode = StreamingCaptureMode.Audio, | |
MemoryPreference = MediaCaptureMemoryPreference.Cpu, | |
SharingMode = MediaCaptureSharingMode.ExclusiveControl, | |
}; | |
// Initialize the MediaCapture object | |
var mediaCapture = new MediaCapture(); | |
await mediaCapture.InitializeAsync(settings); | |
AudioEncodingProperties audioEncodingProperties = null; | |
MediaFrameReader audioFrameReader = null; | |
foreach (var sourceInfo in mediaCapture.FrameSources | |
.Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio)) | |
{ | |
var audioFrameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id]; | |
audioFrameReader = await mediaCapture.CreateFrameReaderAsync(audioFrameSource); | |
audioEncodingProperties = audioFrameSource.CurrentFormat.AudioEncodingProperties; | |
} | |
if (audioFrameReader == null) | |
{ | |
throw new InvalidOperationException("Could not create a frame reader for the requested audio source."); | |
} | |
return (audioEncodingProperties, audioFrameReader); | |
} | |
/// <summary> | |
/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader. | |
/// </summary> | |
/// <returns>The event handler.</returns> | |
private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler() | |
{ | |
return (sender, args) => | |
{ | |
using var frame = sender.TryAcquireLatestFrame(); | |
if (frame != null) | |
{ | |
using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference; | |
using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame(); | |
AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties; | |
unsafe | |
{ | |
using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read); | |
using IMemoryBufferReference reference = buffer.CreateReference(); | |
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity); | |
uint frameDurMs = (uint)mediaFrame.Duration.TotalMilliseconds; | |
uint sampleRate = audioEncodingProperties.SampleRate; | |
uint sampleCount = (frameDurMs * sampleRate) / 1000; | |
uint numAudioChannels = audioEncodingProperties.ChannelCount; | |
uint bytesPerSample = audioEncodingProperties.BitsPerSample / 8; | |
// Buffer size is (number of samples) * (size of each sample) | |
byte[] audioDataOut = new byte[sampleCount * bytesPerSample]; | |
// Convert to bytes | |
if (numAudioChannels > 1) | |
{ | |
// Data is interlaced, so we need to change the multi-channel input | |
// to the supported single-channel output for StereoKit to consume | |
uint inPos = 0; | |
uint outPos = 0; | |
while (outPos < audioDataOut.Length) | |
{ | |
byte* src = &audioDataIn[inPos]; | |
fixed (byte* dst = &audioDataOut[outPos]) | |
{ | |
Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample); | |
} | |
inPos += bytesPerSample * numAudioChannels; | |
outPos += bytesPerSample; | |
} | |
} | |
else | |
{ | |
// Buffer size is (number of samples) * (size of each sample) | |
byte* src = audioDataIn; | |
fixed (byte* dst = audioDataOut) | |
{ | |
Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length); | |
} | |
} | |
AudioFrame audioData = RetrieveAudioData(audioDataOut); | |
frameInputNode.AddFrame(audioData); | |
} | |
} | |
}; | |
} | |
private static async Task CreateAudioGraph(AudioEncodingProperties audioEncodingProperties) | |
{ | |
// Create an AudioGraph with default settings | |
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Speech); | |
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings); | |
if (result.Status != AudioGraphCreationStatus.Success) | |
{ | |
// Cannot create graph | |
Log.Info(String.Format("AudioGraph Creation Error because {0}", result.Status.ToString())); | |
return; | |
} | |
graph = result.Graph; | |
// Create a device output node | |
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await graph.CreateDeviceOutputNodeAsync(); | |
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success) | |
{ | |
// Cannot create device output node | |
Log.Info(string.Format("Audio Device Output unavailable because {0}", deviceOutputNodeResult.Status.ToString())); | |
} | |
deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode; | |
Log.Info("Device Output Node successfully created"); | |
// Create the FrameInputNode at the same format as the input device. | |
frameInputNode = graph.CreateFrameInputNode(audioEncodingProperties); | |
frameInputNode.AddOutgoingConnection(deviceOutputNode); | |
// Initialize the Frame Input Node in the stopped state | |
frameInputNode.Stop(); | |
// Start the graph since we will only start/stop the frame input node | |
graph.Start(); | |
} | |
unsafe private static AudioFrame RetrieveAudioData(byte[] audioOut) | |
{ | |
// Buffer size is (number of samples) * (size of each sample) | |
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels | |
uint bufferSize = (uint)audioOut.Length; | |
AudioFrame frame = new AudioFrame(bufferSize); | |
using AudioBuffer buffer = frame.LockBuffer(AudioBufferAccessMode.Write); | |
using IMemoryBufferReference reference = buffer.CreateReference(); | |
byte* dataInBytes; | |
uint capacityInBytes; | |
float* dataInFloat; | |
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out dataInBytes, out capacityInBytes); | |
// Cast to float since the data we are generating is float | |
dataInFloat = (float*)dataInBytes; | |
fixed (byte* src = audioOut) | |
{ | |
Buffer.MemoryCopy(src, dataInFloat, bufferSize, bufferSize); | |
} | |
return frame; | |
} | |
/// <summary> | |
/// Provides unsafe native APIs. | |
/// </summary> | |
private static class UnsafeNative | |
{ | |
/// <summary> | |
/// Provides access to an IMemoryBuffer as an array of bytes. | |
/// </summary> | |
[ComImport] | |
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")] | |
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] | |
public unsafe interface IMemoryBufferByteAccess | |
{ | |
/// <summary> | |
/// Gets an IMemoryBuffer as an array of bytes. | |
/// </summary> | |
/// <param name="buffer">A pointer to a byte array containing the buffer data.</param> | |
/// <param name="capacity">The number of bytes in the returned array.</param> | |
void GetBuffer(out byte* buffer, out uint capacity); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment