-
-
Save hotpaw2/ba815fc23b5d642705f2b1dedfaf0107 to your computer and use it in GitHub Desktop.
// | |
// RecordAudio.swift | |
// | |
// This is a Swift class (updated for Swift 5) | |
// that uses the iOS RemoteIO Audio Unit | |
// to record audio input samples, | |
// (should be instantiated as a singleton object.) | |
// | |
// Created by Ronald Nicholson on 10/21/16. | |
// Copyright Β© 2017,2019 HotPaw Productions. All rights reserved. | |
// http://www.nicholson.com/rhn/ | |
// Distribution permission: BSD 2-clause license | |
// | |
import Foundation | |
import AVFoundation | |
import AudioUnit | |
// call setupAudioSessionForRecording() during controlling view load | |
// call startRecording() to start recording in a later UI call | |
final class RecordAudio: NSObject { | |
var audioUnit: AudioUnit? = nil | |
var micPermission = false | |
var sessionActive = false | |
var isRecording = false | |
var sampleRate : Double = 44100.0 // default audio sample rate | |
let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
var circBuffer = [Float](repeating: 0, count: 32768) // for incoming samples | |
var circInIdx : Int = 0 | |
var audioLevel : Float = 0.0 | |
private var hwSRate = 48000.0 // guess of device hardware sample rate | |
private var micPermissionDispatchToken = 0 | |
private var interrupted = false // for restart from audio interruption notification | |
func startRecording() { | |
if isRecording { return } | |
startAudioSession() | |
if sessionActive { | |
startAudioUnit() | |
} | |
} | |
var numberOfChannels: Int = 2 | |
private let outputBus: UInt32 = 0 | |
private let inputBus: UInt32 = 1 | |
func startAudioUnit() { | |
var err: OSStatus = noErr | |
if self.audioUnit == nil { | |
setupAudioUnit() // setup once | |
} | |
guard let au = self.audioUnit | |
else { return } | |
err = AudioUnitInitialize(au) | |
gTmp0 = Int(err) | |
if err != noErr { return } | |
err = AudioOutputUnitStart(au) // start | |
gTmp0 = Int(err) | |
if err == noErr { | |
isRecording = true | |
} | |
} | |
func startAudioSession() { | |
if (sessionActive == false) { | |
// set and activate Audio Session | |
do { | |
let audioSession = AVAudioSession.sharedInstance() | |
if (micPermission == false) { | |
if (micPermissionDispatchToken == 0) { | |
micPermissionDispatchToken = 1 | |
audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
if granted { | |
self.micPermission = true | |
return | |
// check for this flag and call from UI loop if needed | |
} else { | |
gTmp0 += 1 | |
// dispatch in main/UI thread an alert | |
// informing that mic permission is not switched on | |
} | |
}) | |
} | |
} | |
if micPermission == false { return } | |
try audioSession.setCategory(AVAudioSession.Category.record) | |
// choose 44100 or 48000 based on hardware rate | |
// sampleRate = 44100.0 | |
var preferredIOBufferDuration = 0.0058 // 5.8 milliseconds = 256 samples | |
hwSRate = audioSession.sampleRate // get native hardware rate | |
if hwSRate == 48000.0 { sampleRate = 48000.0 } // set session to hardware rate | |
if hwSRate == 48000.0 { preferredIOBufferDuration = 0.0053 } | |
let desiredSampleRate = sampleRate | |
try audioSession.setPreferredSampleRate(desiredSampleRate) | |
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
NotificationCenter.default.addObserver( | |
forName: AVAudioSession.interruptionNotification, | |
object: nil, | |
queue: nil, | |
using: myAudioSessionInterruptionHandler ) | |
try audioSession.setActive(true) | |
sessionActive = true | |
} catch /* let error as NSError */ { | |
// handle error here | |
} | |
} | |
} | |
private func setupAudioUnit() { | |
var componentDesc: AudioComponentDescription | |
= AudioComponentDescription( | |
componentType: OSType(kAudioUnitType_Output), | |
componentSubType: OSType(kAudioUnitSubType_RemoteIO), | |
componentManufacturer: OSType(kAudioUnitManufacturer_Apple), | |
componentFlags: UInt32(0), | |
componentFlagsMask: UInt32(0) ) | |
var osErr: OSStatus = noErr | |
let component: AudioComponent! = AudioComponentFindNext(nil, &componentDesc) | |
var tempAudioUnit: AudioUnit? | |
osErr = AudioComponentInstanceNew(component, &tempAudioUnit) | |
self.audioUnit = tempAudioUnit | |
guard let au = self.audioUnit | |
else { return } | |
// Enable I/O for input. | |
var one_ui32: UInt32 = 1 | |
osErr = AudioUnitSetProperty(au, | |
kAudioOutputUnitProperty_EnableIO, | |
kAudioUnitScope_Input, | |
inputBus, | |
&one_ui32, | |
UInt32(MemoryLayout<UInt32>.size)) | |
// Set format to 32-bit Floats, linear PCM | |
let nc = 2 // 2 channel stereo | |
var streamFormatDesc:AudioStreamBasicDescription = AudioStreamBasicDescription( | |
mSampleRate: Double(sampleRate), | |
mFormatID: kAudioFormatLinearPCM, | |
mFormatFlags: ( kAudioFormatFlagsNativeFloatPacked ), | |
mBytesPerPacket: UInt32(nc * MemoryLayout<UInt32>.size), | |
mFramesPerPacket: 1, | |
mBytesPerFrame: UInt32(nc * MemoryLayout<UInt32>.size), | |
mChannelsPerFrame: UInt32(nc), | |
mBitsPerChannel: UInt32(8 * (MemoryLayout<UInt32>.size)), | |
mReserved: UInt32(0) | |
) | |
osErr = AudioUnitSetProperty(au, | |
kAudioUnitProperty_StreamFormat, | |
kAudioUnitScope_Input, outputBus, | |
&streamFormatDesc, | |
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
osErr = AudioUnitSetProperty(au, | |
kAudioUnitProperty_StreamFormat, | |
kAudioUnitScope_Output, | |
inputBus, | |
&streamFormatDesc, | |
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
var inputCallbackStruct | |
= AURenderCallbackStruct(inputProc: recordingCallback, | |
inputProcRefCon: | |
UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())) | |
osErr = AudioUnitSetProperty(au, | |
AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback), | |
AudioUnitScope(kAudioUnitScope_Global), | |
inputBus, | |
&inputCallbackStruct, | |
UInt32(MemoryLayout<AURenderCallbackStruct>.size)) | |
// Ask CoreAudio to allocate buffers for us on render. | |
// Is this true by default? | |
osErr = AudioUnitSetProperty(au, | |
AudioUnitPropertyID(kAudioUnitProperty_ShouldAllocateBuffer), | |
AudioUnitScope(kAudioUnitScope_Output), | |
inputBus, | |
&one_ui32, | |
UInt32(MemoryLayout<UInt32>.size)) | |
gTmp0 = Int(osErr) | |
} | |
let recordingCallback: AURenderCallback = { ( | |
inRefCon, | |
ioActionFlags, | |
inTimeStamp, | |
inBusNumber, | |
frameCount, | |
ioData ) -> OSStatus in | |
let audioObject = unsafeBitCast(inRefCon, to: RecordAudio.self) | |
var err: OSStatus = noErr | |
// set mData to nil, AudioUnitRender() should be allocating buffers | |
var bufferList = AudioBufferList( | |
mNumberBuffers: 1, | |
mBuffers: AudioBuffer( | |
mNumberChannels: UInt32(2), | |
mDataByteSize: 16, | |
mData: nil)) | |
if let au = audioObject.audioUnit { | |
err = AudioUnitRender(au, | |
ioActionFlags, | |
inTimeStamp, | |
inBusNumber, | |
frameCount, | |
&bufferList) | |
} | |
audioObject.processMicrophoneBuffer( inputDataList: &bufferList, | |
frameCount: UInt32(frameCount) ) | |
return 0 | |
} | |
func processMicrophoneBuffer( // process RemoteIO Buffer from mic input | |
inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
frameCount : UInt32 ) | |
{ | |
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
let mBuffers : AudioBuffer = inputDataPtr[0] | |
let count = Int(frameCount) | |
// Microphone Input Analysis | |
// let data = UnsafePointer<Int16>(mBuffers.mData) | |
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
if let bptr = bufferPointer { | |
let dataArray = bptr.assumingMemoryBound(to: Float.self) | |
var sum : Float = 0.0 | |
var j = self.circInIdx | |
let m = self.circBuffSize | |
for i in 0..<(count/2) { | |
let x = Float(dataArray[i+i ]) // copy left channel sample | |
let y = Float(dataArray[i+i+1]) // copy right channel sample | |
self.circBuffer[j ] = x | |
self.circBuffer[j + 1] = y | |
j += 2 ; if j >= m { j = 0 } // into circular buffer | |
sum += x * x + y * y | |
} | |
self.circInIdx = j // circular index will always be less than size | |
// measuredMicVol_1 = sqrt( Float(sum) / Float(count) ) // scaled volume | |
if sum > 0.0 && count > 0 { | |
let tmp = 5.0 * (logf(sum / Float(count)) + 20.0) | |
let r : Float = 0.2 | |
audioLevel = r * tmp + (1.0 - r) * audioLevel | |
} | |
} | |
} | |
func stopRecording() { | |
AudioUnitUninitialize(self.audioUnit!) | |
isRecording = false | |
} | |
func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
let interuptionDict = notification.userInfo | |
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
let interuptionVal = AVAudioSession.InterruptionType( | |
rawValue: (interuptionType as AnyObject).uintValue ) | |
if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
if (isRecording) { | |
stopRecording() | |
isRecording = false | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
sessionActive = false | |
} catch { | |
} | |
interrupted = true | |
} | |
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
if (interrupted) { | |
// potentially restart here | |
} | |
} | |
} | |
} | |
} | |
// end of class RecordAudio | |
final class RecordAudio_v2: NSObject { | |
var auAudioUnit: AUAudioUnit! = nil | |
var enableRecording = true | |
var audioSessionActive = false | |
var audioSetupComplete = false | |
var isRecording = false | |
var sampleRate : Double = 48000.0 // desired audio sample rate | |
let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
var circBuffer = [Float](repeating: 0, count: 32768) | |
var circInIdx : Int = 0 // sample input index | |
var circOutIdx : Int = 0 // sample output index | |
var audioLevel : Float = 0.0 | |
private var micPermissionRequested = false | |
private var micPermissionGranted = false | |
// for restart from audio interruption notification | |
private var audioInterrupted = false | |
private var renderBlock : AURenderBlock? = nil | |
func startRecording() { | |
if isRecording { return } | |
if audioSessionActive == false { | |
// configure and activate Audio Session, this might change the sampleRate | |
setupAudioSessionForRecording() | |
} | |
guard micPermissionGranted && audioSessionActive else { return } | |
let audioFormat = AVAudioFormat( | |
commonFormat: AVAudioCommonFormat.pcmFormatInt16, // pcmFormatInt16, pcmFormatFloat32, | |
sampleRate: Double(sampleRate), // 44100.0 48000.0 | |
channels:AVAudioChannelCount(2), // 1 or 2 | |
interleaved: true ) // true for interleaved stereo | |
if (auAudioUnit == nil) { | |
setupRemoteIOAudioUnitForRecord(audioFormat: audioFormat!) | |
} | |
renderBlock = auAudioUnit.renderBlock // returns AURenderBlock() | |
if ( enableRecording | |
&& micPermissionGranted | |
&& audioSetupComplete | |
&& audioSessionActive | |
&& isRecording == false ) { | |
auAudioUnit.isInputEnabled = true | |
auAudioUnit.outputProvider = { // AURenderPullInputBlock() | |
(actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in | |
if let block = self.renderBlock { // AURenderBlock? | |
let err : OSStatus = block(actionFlags, | |
timestamp, | |
frameCount, | |
1, | |
inputData, | |
.none) | |
if err == noErr { | |
// save samples from current input buffer to circular buffer | |
self.recordMicrophoneInputSamples( | |
inputDataList: inputData, | |
frameCount: UInt32(frameCount) ) | |
} | |
} | |
let err2 : AUAudioUnitStatus = noErr | |
return err2 | |
} | |
do { | |
circInIdx = 0 // initialize circular buffer pointers | |
circOutIdx = 0 | |
try auAudioUnit.allocateRenderResources() | |
try auAudioUnit.startHardware() // equivalent to AudioOutputUnitStart ??? | |
isRecording = true | |
} catch { | |
// placeholder for error handling | |
} | |
} | |
} | |
func stopRecording() { | |
if (isRecording) { | |
auAudioUnit.stopHardware() | |
isRecording = false | |
} | |
if (audioSessionActive) { | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
} catch /* let error as NSError */ { | |
} | |
audioSessionActive = false | |
} | |
} | |
private func recordMicrophoneInputSamples( // process RemoteIO Buffer from mic input | |
inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
frameCount : UInt32 ) | |
{ | |
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
let mBuffers : AudioBuffer = inputDataPtr[0] | |
let count = Int(frameCount) | |
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
var j = self.circInIdx // current circular array input index | |
let n = self.circBuffSize | |
var audioLevelSum : Float = 0.0 | |
if let bptr = bufferPointer?.assumingMemoryBound(to: Int16.self) { | |
for i in 0..<(count/2) { | |
// Save samples in circular buffer for latter processing | |
let x = Float(bptr[i+i ]) | |
let y = Float(bptr[i+i+1]) | |
self.circBuffer[j ] = x // Stereo Left | |
self.circBuffer[j + 1] = y // Stereo Right | |
j += 2 ; if j >= n { j = 0 } // Circular buffer looping | |
// Microphone Input Analysis | |
audioLevelSum += x * x + y * y | |
} | |
} | |
OSMemoryBarrier(); // from libkern/OSAtomic.h | |
self.circInIdx = j // circular index will always be less than size | |
if audioLevelSum > 0.0 && count > 0 { | |
audioLevel = logf(audioLevelSum / Float(count)) | |
} | |
} | |
// set up and activate Audio Session | |
func setupAudioSessionForRecording() { | |
do { | |
let audioSession = AVAudioSession.sharedInstance() | |
if (micPermissionGranted == false) { | |
if (micPermissionRequested == false) { | |
micPermissionRequested = true | |
audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
if granted { | |
self.micPermissionGranted = true | |
self.startRecording() | |
return | |
} else { | |
self.enableRecording = false | |
// dispatch in main/UI thread an alert | |
// informing that mic permission is not switched on | |
} | |
}) | |
} | |
return | |
} | |
if enableRecording { | |
try audioSession.setCategory(AVAudioSession.Category.record) | |
} | |
let preferredIOBufferDuration = 0.0053 // 5.3 milliseconds = 256 samples | |
try audioSession.setPreferredSampleRate(sampleRate) // at 48000.0 | |
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
NotificationCenter.default.addObserver( | |
forName: AVAudioSession.interruptionNotification, | |
object: nil, | |
queue: nil, | |
using: myAudioSessionInterruptionHandler ) | |
try audioSession.setActive(true) | |
audioSessionActive = true | |
} catch /* let error as NSError */ { | |
// placeholder for error handling | |
} | |
} | |
// find and set up the sample format for the RemoteIO Audio Unit | |
private func setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat) { | |
do { | |
let audioComponentDescription = AudioComponentDescription( | |
componentType: kAudioUnitType_Output, | |
componentSubType: kAudioUnitSubType_RemoteIO, | |
componentManufacturer: kAudioUnitManufacturer_Apple, | |
componentFlags: 0, | |
componentFlagsMask: 0 ) | |
try auAudioUnit = AUAudioUnit(componentDescription: audioComponentDescription) | |
// bus 1 is for data that the microphone exports out to the handler block | |
let bus1 = auAudioUnit.outputBusses[1] | |
try bus1.setFormat(audioFormat) // for microphone bus | |
audioSetupComplete = true | |
} catch /* let error as NSError */ { | |
// placeholder for error handling | |
} | |
} | |
private func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
let interuptionDict = notification.userInfo | |
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
let interuptionVal = AVAudioSession.InterruptionType( | |
rawValue: (interuptionType as AnyObject).uintValue ) | |
if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
// [self beginInterruption]; | |
if (isRecording) { | |
auAudioUnit.stopHardware() | |
isRecording = false | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
audioSessionActive = false | |
} catch { | |
// placeholder for error handling | |
} | |
audioInterrupted = true | |
} | |
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
// [self endInterruption]; | |
if (audioInterrupted) { | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(true) | |
audioSessionActive = true | |
if (auAudioUnit.renderResourcesAllocated == false) { | |
try auAudioUnit.allocateRenderResources() | |
} | |
try auAudioUnit.startHardware() | |
isRecording = true | |
} catch { | |
// placeholder for error handling | |
} | |
} | |
} | |
} | |
} | |
} // end of RecordAudio class | |
// eof |
how does I can get data of the wavelength. In processMicrophoneBuffer, what does dataArray means? does it represent amplitude? and what does mean circular buffer?
Questions about generic audio data types and circular buffers/fifos (etc.) are better asked over on stackoverflow.
Thank you for this example!
How to store recorded file?
Hi, I'm curious the move away from Audio Unit v3 API/AUAudioUnit
to CoreAudio in revision https://gist.github.com/hotpaw2/ba815fc23b5d642705f2b1dedfaf0107/1193de873b813a3f74d7ff4deba11d159c9edf43
I'm playing with remote io/AUVoiceIO with AUAudioUnit
right now and was wondering if you hit a roadblock of sorts?
Shouldn't the streamFormatDesc
init say MemoryLayout<Float32>.size
instead of MemoryLayout<UInt32>.size
?
Yes. MemoryLayout<Float32>.size
is correct. The current code depends on Float32 and UInt32 being the same size, which isn't good practice.
How can I get beat and pitch value from given function?
func processMicrophoneBuffer( // process RemoteIO Buffer from mic input
inputDataList : UnsafeMutablePointer,
frameCount : UInt32 )
Please help me.
audioSession.setCategory(AVAudioSession.Category.record) is not giving callback in outputProvider. When i changed it to audioSession.setCategory(AVAudioSession.Category.playAndRecord) its giving error -50 in renderBlock inside output provider. I am using version2. Trying to get 8k hz sample Rate and 20 milliseconds data. Please help
Hi, I'm curious the move away from Audio Unit v3 API/
AUAudioUnit
to CoreAudio in revision https://gist.github.com/hotpaw2/ba815fc23b5d642705f2b1dedfaf0107/1193de873b813a3f74d7ff4deba11d159c9edf43I'm playing with remote io/AUVoiceIO with
AUAudioUnit
right now and was wondering if you hit a roadblock of sorts?
I haven't yet figured out why (it's not a priority for me currently) but the obvious reason could be that it just doesn't work. The added RecordAudio_v2
class is dead - though no explicit errors are thrown.
I think there is a misunderstanding behind the class and apple docs on the matter aren't good
I'll try to clarify what the issue is and will update if I succeed
OK, I have figured it out. The reason the AudioRecord_v2
wasn't working is that it has incorrectly used the AUAudioUnit.outputProvider
property. This property is used for playback on an IO unit. To record microphone data one needs to set the inputHandler
, and there create an AudioBufferList
to feed it into the cached renderBlock
to actually receive the sound samples
A fixed revision at https://gist.github.com/leonid-s-usov/dcd674b0a8baf96123cac6c4e08e3e0c
- changed to mono input, but made the processing independent of the actual channel number
- fixed AudioRecord_v2 which uses the new AUAudioUnit interface
Just checking this out for the first time... gTmp0 is undefined ?
yeah, just add that var. Or check my revision from above
OK, I have figured it out. The reason the
AudioRecord_v2
wasn't working is that it has incorrectly used theAUAudioUnit.outputProvider
property. This property is used for playback on an IO unit. To record microphone data one needs to set theinputHandler
, and there create anAudioBufferList
to feed it into the cachedrenderBlock
to actually receive the sound samples
Thanks its working fine. I wanted to ask how to consume data from circular buffer every 20MS and 160 is the frame count ie. 8000 Hz in buffer duration 20MS.
Answering your question is hard without a context. Let me align some concepts here and then have you clarify your query.
A circular buffer is needed to decouple the consumer and the producer while keeping the memory footprint at the minimum and avoid reallocations.
For your system to work stable your general consumption rate should be greater than or equal to the producing rate. Otherwise, you'll eventually run out of space, and circular buffer wouldn't help here.
However, with the help of the circular buffer, you can have your consumer and your producer work in different chunks.
For example, the audio hardware would "bring" you new data in chunks of 160 frames 50 times a second. However, your processing could actually take that data once a second but use all 8000 frames.
You could also change roles and have your audio hardware deliver data in bigger chunks, while your processing logic would take small portions from the buffer frequently.
For either case, your buffer size would be proportional to your sample rate multiplied by the longest period of inactivity of either producer or consumer.
Finally, using the circular buffer can also allow you to perform overlapping analysis, when you would consume, say, 800 frames at once but only discard 400 for the next iteration. This will give you the next packet of 800 frames where half of them were part of the previous packet. Such consumption should run at a double rate compared to the non-overlapping processor to keep up with the same producer
Answering your question is hard without a context. Let me align some concepts here and then have you clarify your query.
A circular buffer is needed to decouple the consumer and the producer while keeping the memory footprint at the minimum and avoid reallocations.
For your system to work stable your general consumption rate should be greater than or equal to the producing rate. Otherwise, you'll eventually run out of space, and circular buffer wouldn't help here.
However, with the help of the circular buffer, you can have your consumer and your producer work in different chunks.For example, the audio hardware would "bring" you new data in chunks of 160 frames 50 times a second. However, your processing could actually take that data once a second but use all 8000 frames.
You could also change roles and have your audio hardware deliver data in bigger chunks, while your processing logic would take small portions from the buffer frequently.
For either case, your buffer size would be proportional to your sample rate multiplied by the longest period of inactivity of either producer or consumer.Finally, using the circular buffer can also allow you to perform overlapping analysis, when you would consume, say, 800 frames at once but only discard 400 for the next iteration. This will give you the next packet of 800 frames where half of them were part of the previous packet. Such consumption should run at a double rate compared to the non-overlapping processor to keep up with the same producer
I need to encode data in AMR and send every 20MS at sample rate 8000 hz(8000*0.020 = 160 frame Count). I am doing a VoIP calling app using a IMS stack which needs data every 20MS. We have our own stack only thing is i have feed voice data and get other side voice data(every 20MS to avoid delay). Sample size is 160 short data with AMR Encoded. I have AMR Codec with me. I need to pass data to encoder mechanism and get AMR encoded data in return. Receiving of data is similar way getting every 20 MS data then decode it and play it to Speaker.
I think we need some Queue kind of array logic which will have these 160 short data inside as one element. We append data after the circular queue filling.Inside there will be iteration, with condition if size of unread data is greater than 160 then append otherwise it will append in next render callback. I will have timer to get data from array every 20 MS and then encode and send. Only problem is how to consume 160 frame from circular buffer into data and append it to array. Correct me if my assumption is wrong. Help me if there is solution.
One question is i am using format for audio AVAudioCommonFormat.pcmFormatInt16, Is it needed to use Float32 type in circular buffer array?
In your case, the circular buffer might only be needed to guarantee the packet size - 160 frames. Although you would most probably get the same number of samples from the system AU callback, you can't rely on that and if your downstream logic requires this exact size for every call you will be safer with the buffer.
Please note that most probably you may be very well by pushing the data into your encoder directly. Usually, encoders have input buffers which can deal with slight deviations in the input packet sizes. Please consult your documentation.
Set your desired sample format as per your encoder and apply it to the AudioUnit input bus, then set your desired sample rate and IO buffer duration on the AudioSession. This is the key moment if you would like to avoid creating a separate timing subsystem to follow the 20ms requirement. You can ask audio session to deliver you input once is 20 ms and use its callback to perform your encoding
try audioSession.setPreferredSampleRate(8000)
try audioSession.setPreferredIOBufferDuration(0.02)
Having this done, the recordMicrophoneInputSamples
function will be called 50 times a second.
If you need to, you may copy the data into the circular buffer and then immediately consume it from the buffer. This will require a buffer size of no more than 2x160 frames (please remember that frame could be stereo, so do your math)
You can change the type of the buffer sample to the one you need at line 33 (or 332).
Thanks for your reply, setting setPreferredSampleRate and setPreferredIOBufferDuration does not gives the audio callback for same with different architecture of devices. Checked it after setting the above preferred settings and debugging you can get to know that. For that Cause i wanted a timer implementation for the 20 MS or whatever required. Thats why i have implemented a queue which will give data in every 20MS data for consuming and appending of data will be done after recordMicrophoneInputSamples. This queue will be having encoded data. Encoder Logic as You said is having their own logic for sampling it into 320. I was thinking that whether my approach to achieve it by implementing a timer is right or wrong. If I am wrong please correct me.
OK, you are right. Checking this indeed shows that iPhone hardware in my case sets the actual sampling rate to 16000 if I ask it for 8000. Well, that's according to the docs, as we are just setting the preferred values.
However, we can still rely on the CoreAudio to deliver us a resampled frame stream. Even though the sampling rate of the AudioSession (i.e. the hardware sampling rate) is 16000, we can ask for the audio format on the input bus to be sampled at 8000.
(lldb) po audioSession.ioBufferDuration
0.00800000037997961
(lldb) po audioSession.sampleRate
16000.0
(lldb) po audioFormat.sampleRate
8000.0
Above you can see that the system has not accepted my requested 0.02 ioBufferDuration, and also has set the sampling rate to 16k instead of 8. But the audio format I'm setting on the AudioUnit has 8k rate, and here is my callback being called:
(lldb) po frameCount
64
Let's do the math. 16000 * 0.008 = 128. This is the expected number of frames with the system buffer duration and 16k sample rate. However, we are receiving exactly 1/2 of that, which means that CoreAudio has resampled the input for us.
Good. Now we only have to collect 160 samples and push them downstream.
First of all, we will need our buffer. If you say that your codec doesn't have an input buffer then we'll have to use our circular buffer.
Secondly, you could go and implement your parallel timer logic which would fire once in 20 ms exactly and consume the data from our buffer. And that can't be "wrong" as long as it works. There may be cases when such a solution is preferred (for example, if your encoding is taking too much time)
But I say you might not need that. Every time you receive a callback with an additional 64 frames you first push them into the buffer and then check if your buffer has now 160 or more frames. If yes, then immediately call downstream, synchronously, consuming just the 160 frames and leaving the extra frames in the buffer for the next cycle.
If you are concerned about calling the downstream synchronously, you can always do that with an async block scheduled for another queue. Just make sure that your circular buffer is thread-safe then. Technically I'd say, give the synchronous solution a go first.
OK, you are right. Checking this indeed shows that iPhone hardware in my case sets the actual sampling rate to 16000 if I ask it for 8000. Well, that's according to the docs, as we are just setting the preferred values.
However, we can still rely on the CoreAudio to deliver us a resampled frame stream. Even though the sampling rate of the AudioSession (i.e. the hardware sampling rate) is 16000, we can ask for the audio format on the input bus to be sampled at 8000.(lldb) po audioSession.ioBufferDuration 0.00800000037997961 (lldb) po audioSession.sampleRate 16000.0 (lldb) po audioFormat.sampleRate 8000.0
Above you can see that the system has not accepted my requested 0.02 ioBufferDuration, and also has set the sampling rate to 16k instead of 8. But the audio format I'm setting on the AudioUnit has 8k rate, and here is my callback being called:
(lldb) po frameCount 64
Let's do the math. 16000 * 0.008 = 128. This is the expected number of frames with the system buffer duration and 16k sample rate. However, we are receiving exactly 1/2 of that, which means that CoreAudio has resampled the input for us.
Good. Now we only have to collect 160 samples and push them downstream.
First of all, we will need our buffer. If you say that your codec doesn't have an input buffer then we'll have to use our circular buffer.
Secondly, you could go and implement your parallel timer logic which would fire once in 20 ms exactly and consume the data from our buffer. And that can't be "wrong" as long as it works. There may be cases when such a solution is preferred (for example, if your encoding is taking too much time)
But I say you might not need that. Every time you receive a callback with an additional 64 frames you first push them into the buffer and then check if your buffer has now 160 or more frames. If yes, then immediately call downstream, synchronously, consuming just the 160 frames and leaving the extra frames in the buffer for the next cycle.
If you are concerned about calling the downstream synchronously, you can always do that with an async block scheduled for another queue. Just make sure that your circular buffer is thread-safe then. Technically I'd say, give the synchronous solution a go first.
Why the circularBuff is float type in your code? If i try changing circBuffer from float to Int32 then it's giving crash Line number 443 (This link: https://gist.github.com/leonid-s-usov/dcd674b0a8baf96123cac6c4e08e3e0c ), sum += Float32(x*x). Actually AMR Codec is converting internally into char type and then to short array of 160 size. What type we have to send from circBuffer because if i select 160 size then the total bytes is 640. But Codec has 320 bytes conversion and appending data in NSMutableData with [out appendBytes:amrFrame length:byte_counter]. After iteration it return total AMR Data.
//See this snippet
NSData *EncodePCMToAMR(char *data, int maxLen,int nChannels, int nBitsPerSample) {
char oldBuf = data;
/ input speech vector */
short speech[160];
/* counters */
int byte_counter, frames = 0, bytes = 0;
}
One more question is how to convert this circBuffer array to PCM data as my Codec support PCM to AMR?
@manishlnmcbm with all respect, I would like to limit this thread currently as it looks like it departs too far from the topic of the code gist
First of all, I am not taking any credit for the code, it's not mine. I've used it as boilerplate and changed some parts and fixed some minor issues, which I have submitted to the public via my account.
Regarding your questions, I believe I've given enough information about using the new version 3 of audiounit to record real time audio to feed to downstream coders, including the suggestion to change the type of the circular buffer to address your requirements.
If you encounter problems refactoring the working code from Float32 to another type then I suggest that you turn for help regarding more basic programming concepts at stackoverflow or any other education platform.
Best regards.
Hi, i am looking some swift code to record and stream it through server. However i am still new about this. So is this code actually can prepare the recorded buffer audio to some binary pcm/wav data to send it to server? If it does, how? Would you explain it? That might be helped. Thanks,
I'm encountering the error IPCAUClient: can't connect to server (-66748)
upon calling startRecording()
and haven't yet found any solutions.
It looks like this error is triggered within setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat)
from
let bus1 = auAudioUnit.outputBusses[1]
Any suggestions?
The outputProvider
block isn't getting called and I have a hunch it's related to this.
There are periodic gaps in the recorded audio. One for every AudioUnit render. I don't think it's a problem with the indexing in processMicrophoneBuffer, and inTimeStamp is incrementing perfectly with respect to frameCount. The size of the gaps is related to frames per slice (set indirectly through prefferedIOBufferDuration). Is this a latency issue? Any insight would be appreciated.
Thank you very much! ππππ
Thank you for this example! This plus aubio (https://github.com/aubio/aubio) helps me in an app I am developing to process onsets from the iphone mic.