Converting AVAudioPCMBuffer to another AVAudioPCMBuffer - avaudioengine

I am trying to convert a determined AVAudioPCMBuffer (44.1khz, 1ch, float32, not interleaved) to another AVAudioPCMBuffer (16khz, 1ch, int16, not interleaved) using AVAudioConverter and write it using AVAudioFile.
My code uses the library AudioKit together with the tap AKLazyTap to get a buffer each determined time, based on this source:
https://github.com/AudioKit/AudioKit/tree/master/AudioKit/Common/Taps/Lazy%20Tap
Here is my implementation:
lazy var downAudioFormat: AVAudioFormat = {
let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
return AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: 16000,
interleaved: false,
channelLayout: avAudioChannelLayout)
}()
//...
AKSettings.sampleRate = 44100
AKSettings.numberOfChannels = AVAudioChannelCount(1)
AKSettings.ioBufferDuration = 0.002
AKSettings.defaultToSpeaker = true
//...
let mic = AKMicrophone()
let originalAudioFormat: AVAudioFormat = mic.avAudioNode.outputFormat(forBus: 0) //41.100, 1ch, float32...
let inputFrameCapacity = AVAudioFrameCount(1024)
//I don't think this is correct, the audio is getting chopped...
//How to calculate it correctly?
let outputFrameCapacity = AVAudioFrameCount(512)
guard let inputBuffer = AVAudioPCMBuffer(
pcmFormat: originalAudioFormat,
frameCapacity: inputFrameCapacity) else {
fatalError()
}
// Your timer should fire equal to or faster than your buffer duration
bufferTimer = Timer.scheduledTimer(
withTimeInterval: AKSettings.ioBufferDuration/2,
repeats: true) { [weak self] _ in
guard let unwrappedSelf = self else {
return
}
unwrappedSelf.lazyTap?.fillNextBuffer(inputBuffer, timeStamp: nil)
// This is important, since we're polling for samples, sometimes
//it's empty, and sometimes it will be double what it was the last call.
if inputBuffer.frameLength == 0 {
return
}
//This converter is only create once, as the AVAudioFile. Ignore this code I call a function instead.
let converter = AVAudioConverter(from: originalAudioFormat, to: downAudioFormat)
converter.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Normal
converter.sampleRateConverterQuality = .min
converter.bitRateStrategy = AVAudioBitRateStrategy_Constant
guard let outputBuffer = AVAudioPCMBuffer(
pcmFormat: converter.outputFormat,
frameCapacity: outputFrameCapacity) else {
print("Failed to create new buffer")
return
}
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return inputBuffer
}
var error: NSError?
let status: AVAudioConverterOutputStatus = converter.convert(
to: outputBuffer,
error: &error,
withInputFrom: inputBlock)
switch status {
case .error:
if let unwrappedError: NSError = error {
print(unwrappedError)
}
return
default: break
}
//Only created once, instead of this code my code uses a function to verify if the AVAudioFile has been created, ignore it.
outputAVAudioFile = try AVAudioFile(
forWriting: unwrappedCacheFilePath,
settings: format.settings,
commonFormat: format.commonFormat,
interleaved: false)
do {
try outputAVAudioFile?.write(from: avAudioPCMBuffer)
} catch {
print(error)
}
}
(Please note that AVAudioConverter and AVAudioFile are being reused, the initialization there doesn't represent the real implementation on my code, just to simplify and make it more simple to understand.)
With frameCapacity on the outputBuffer: AVAudioPCMBuffer set to 512, the audio get chopped. Is there any way to discovery the correct frameCapacity for this buffer?
Written using Swift 4 and AudioKit 4.1.
Many thanks!

I managed to solve this problem installing a Tap on the inputNode like this:
lazy var downAudioFormat: AVAudioFormat = {
let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
return AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: SAMPLE_RATE,
interleaved: true,
channelLayout: avAudioChannelLayout)
}()
private func addBufferListener(_ avAudioNode: AVAudioNode) {
let originalAudioFormat: AVAudioFormat = avAudioNode.inputFormat(forBus: 0)
let downSampleRate: Double = downAudioFormat.sampleRate
let ratio: Float = Float(originalAudioFormat.sampleRate)/Float(downSampleRate)
let converter: AVAudioConverter = buildConverter(originalAudioFormat)
avAudioNode.installTap(
onBus: 0,
bufferSize: AVAudioFrameCount(downSampleRate * 2),
format: originalAudioFormat,
block: { (buffer: AVAudioPCMBuffer!, _ : AVAudioTime!) -> Void in
let capacity = UInt32(Float(buffer.frameCapacity)/ratio)
guard let outputBuffer = AVAudioPCMBuffer(
pcmFormat: self.downAudioFormat,
frameCapacity: capacity) else {
print("Failed to create new buffer")
return
}
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
var error: NSError?
let status: AVAudioConverterOutputStatus = converter.convert(
to: outputBuffer,
error: &error,
withInputFrom: inputBlock)
switch status {
case .error:
if let unwrappedError: NSError = error {
print("Error \(unwrappedError)"))
}
return
default: break
}
self.delegate?.flushAudioBuffer(outputBuffer)
})
}

Related

How can I create a spectrogram from an audio file?

I have tried to create a spectrogram using this apple tutorial but it uses live audio input from the microphone. I want to create one from an existing file. I have tried to convert apples example from live input to existing files with no luck, so I am wondering if there are any better resources out there.
Here is how I am getting the samples:
let samples: (naturalTimeScale: Int32, data: [Float]) = {
guard let samples = AudioUtilities.getAudioSamples(
forResource: resource,
withExtension: wExtension) else {
fatalError("Unable to parse the audio resource.")
}
return samples
}()
// Returns an array of single-precision values for the specified audio resource.
static func getAudioSamples(forResource: String,
withExtension: String) -> (naturalTimeScale: CMTimeScale,
data: [Float])? {
guard let path = Bundle.main.url(forResource: forResource,
withExtension: withExtension) else {
return nil
}
let asset = AVAsset(url: path.absoluteURL)
guard
let reader = try? AVAssetReader(asset: asset),
let track = asset.tracks.first else {
return nil
}
let outputSettings: [String: Int] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVNumberOfChannelsKey: 1,
AVLinearPCMIsBigEndianKey: 0,
AVLinearPCMIsFloatKey: 1,
AVLinearPCMBitDepthKey: 32,
AVLinearPCMIsNonInterleaved: 1
]
let output = AVAssetReaderTrackOutput(track: track,
outputSettings: outputSettings)
reader.add(output)
reader.startReading()
var samplesData = [Float]()
while reader.status == .reading {
if
let sampleBuffer = output.copyNextSampleBuffer(),
let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) {
let bufferLength = CMBlockBufferGetDataLength(dataBuffer)
var data = [Float](repeating: 0,
count: bufferLength / 4)
CMBlockBufferCopyDataBytes(dataBuffer,
atOffset: 0,
dataLength: bufferLength,
destination: &data)
samplesData.append(contentsOf: data)
}
}
return (naturalTimeScale: track.naturalTimeScale, data: samplesData)
}
And here is how I am performing the "fft" or dct in this case:
static var sampleCount = 1024
let forwardDCT = vDSP.DCT(count: sampleCount,
transformType: .II)
guard let freqs = forwardDCT?.transform(samples.data) else { return }
This is the part where I begin to get lost/stuck in the apple tutorial. How can I create the spectrogram from here?

Stream Microphone Audio from one device to another using Multipeer connectivy and EZAudio

[TLDR: Receiving an ASSERTION FAILURE on CABufferList.h (find error at the bottom) when trying to save streamed audio data]
I am having trouble saving microphone audio that is streamed between devices using Multipeer Connectivity. So far I have two devices connected to each other using Multipeer Connectivity and have them sending messages and streams to each other.
Finally I have the StreamDelegate method
func stream(_ aStream: Stream, handle eventCode: Stream.Event) {
// create a buffer for capturing the inputstream data
let bufferSize = 2048
let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize)
defer {
buffer.deallocate()
}
var audioBuffer: AudioBuffer!
var audioBufferList: AudioBufferList!
switch eventCode {
case .hasBytesAvailable:
// if the input stream has bytes available
// return the actual number of bytes placed in the buffer;
let read = self.inputStream.read(buffer, maxLength: bufferSize)
if read < 0 {
//Stream error occured
print(self.inputStream.streamError!)
} else if read == 0 {
//EOF
break
}
guard let mData = UnsafeMutableRawPointer(buffer) else { return }
audioBuffer = AudioBuffer(mNumberChannels: 1, mDataByteSize: UInt32(read), mData: mData)
audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: audioBuffer)
let audioBufferListPointer = UnsafeMutablePointer<AudioBufferList>.allocate(capacity: read)
audioBufferListPointer.pointee = audioBufferList
DispatchQueue.main.async {
if self.ezRecorder == nil {
self.recordAudio()
}
self.ezRecorder?.appendData(from: audioBufferListPointer, withBufferSize: UInt32(read))
}
print("hasBytesAvailable \(audioBuffer!)")
case .endEncountered:
print("endEncountered")
if self.inputStream != nil {
self.inputStream.delegate = nil
self.inputStream.remove(from: .current, forMode: .default)
self.inputStream.close()
self.inputStream = nil
}
case .errorOccurred:
print("errorOccurred")
case .hasSpaceAvailable:
print("hasSpaceAvailable")
case .openCompleted:
print("openCompleted")
default:
break
}
}
I am getting the stream of data however when I try to save it as an audio file using EZRecorder, I get the following error message
[default] CABufferList.h:184 ASSERTION FAILURE [(nBytes <= buf->mDataByteSize) != 0 is false]:
I suspect the error could be arising when I create AudioStreamBasicDescription for EZRecorder.
I understand there may be other errors here and I appreciate any suggestions to solve the bug and improve the code. Thanks
EZAudio comes with TPCircularBuffer - use that.
Because writing the buffer to file is an async operation, this becomes a great use case for a circular buffer where we have one producer and one consumer.
Use the EZAudioUtilities where possible.
Update: EZRecorder write expects bufferSize to be number of frames to write and not bytes
So something like this should work:
class StreamDelegateInstance: NSObject {
private static let MaxReadSize = 2048
private static let BufferSize = MaxReadSize * 4
private var availableReadBytesPtr = UnsafeMutablePointer<Int32>.allocate(capacity: 1)
private var availableWriteBytesPtr = UnsafeMutablePointer<Int32>.allocate(capacity: 1)
private var ezRecorder: EZRecorder?
private var buffer = UnsafeMutablePointer<TPCircularBuffer>.allocate(capacity: 1)
private var inputStream: InputStream?
init(inputStream: InputStream? = nil) {
self.inputStream = inputStream
super.init()
EZAudioUtilities.circularBuffer(buffer, withSize: Int32(StreamDelegateInstance.BufferSize))
ensureWriteStream()
}
deinit {
EZAudioUtilities.freeCircularBuffer(buffer)
buffer.deallocate()
availableReadBytesPtr.deallocate()
availableWriteBytesPtr.deallocate()
self.ezRecorder?.closeAudioFile()
self.ezRecorder = nil
}
private func ensureWriteStream() {
guard self.ezRecorder == nil else { return }
// stores audio to temporary folder
let audioOutputPath = NSTemporaryDirectory() + "audioOutput2.aiff"
let audioOutputURL = URL(fileURLWithPath: audioOutputPath)
print(audioOutputURL)
// let audioStreamBasicDescription = AudioStreamBasicDescription(mSampleRate: 44100.0, mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked, mBytesPerPacket: 4, mFramesPerPacket: 1, mBytesPerFrame: 4, mChannelsPerFrame: 1, mBitsPerChannel: 32, mReserved: 1081729024)
// EZAudioUtilities.audioBufferList(withNumberOfFrames: <#T##UInt32#>,
// numberOfChannels: 1,
// interleaved: true)
// if you don't need a custom format, consider using EZAudioUtilities.m4AFormat
let format = EZAudioUtilities.aiffFormat(withNumberOfChannels: 1,
sampleRate: 44800)
self.ezRecorder = EZRecorder.init(url: audioOutputURL,
clientFormat: format,
fileType: .AIFF)
}
private func writeStream() {
let ptr = TPCircularBufferTail(buffer, availableWriteBytesPtr)
// ensure we have non 0 bytes to write - which should always be true, but you may want to refactor things
guard availableWriteBytesPtr.pointee > 0 else { return }
let framesToWrite = availableWriteBytesPtr.pointee / 4 // sizeof(float)
let audioBuffer = AudioBuffer(mNumberChannels: 1,
mDataByteSize: UInt32(availableWriteBytesPtr.pointee),
mData: ptr)
let audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: audioBuffer)
self.ezRecorder?.appendData(from: &audioBufferList,
withBufferSize: UInt32(framesToWrite))
TPCircularBufferConsume(buffer, framesToWrite * 4)
}
}
extension StreamDelegateInstance: StreamDelegate {
func stream(_ aStream: Stream, handle eventCode: Stream.Event) {
switch eventCode {
case .hasBytesAvailable:
// if the input stream has bytes available
// return the actual number of bytes placed in the buffer;
guard let ptr = TPCircularBufferHead(buffer, availableReadBytesPtr) else {
print("couldn't get buffer ptr")
break;
}
let bytedsToRead = min(Int(availableReadBytesPtr.pointee), StreamDelegateInstance.MaxReadSize)
let mutablePtr = ptr.bindMemory(to: UInt8.self, capacity: Int(bytedsToRead))
let bytesRead = self.inputStream?.read(mutablePtr,
maxLength: bytedsToRead) ?? 0
if bytesRead < 0 {
//Stream error occured
print(self.inputStream?.streamError! ?? "No bytes read")
break
} else if bytesRead == 0 {
//EOF
break
}
TPCircularBufferProduce(buffer, Int32(bytesRead))
DispatchQueue.main.async { [weak self] in
self?.writeStream()
}
case .endEncountered:
print("endEncountered")
if self.inputStream != nil {
self.inputStream?.delegate = nil
self.inputStream?.remove(from: .current, forMode: .default)
self.inputStream?.close()
self.inputStream = nil
}
case .errorOccurred:
print("errorOccurred")
case .hasSpaceAvailable:
print("hasSpaceAvailable")
case .openCompleted:
print("openCompleted")
default:
break
}
}
}

Low Pass filter + sample rate conversion using Avaudioengine iOS

We are working on a project which allows us to record some sounds from a microphone with a 5k Hz sample rate with some Low-Pass filter & HighPass filter.
What we are using
We are using AvaudioEngine for this purpose.
We are using AVAudioConverter for downgrading the sample rate.
We are using AVAudioUnitEQ for the LowPass & HighPass filter.
Code
let bus = 0
let inputNode = engine.inputNode
let equalizer = AVAudioUnitEQ(numberOfBands: 2)
equalizer.bands[0].filterType = .lowPass
equalizer.bands[0].frequency = 3000
equalizer.bands[0].bypass = false
equalizer.bands[1].filterType = .highPass
equalizer.bands[1].frequency = 1000
equalizer.bands[1].bypass = false
engine.attach(equalizer) //Attach equalizer
// Connect nodes
engine.connect(inputNode, to: equalizer, format: inputNode.inputFormat(forBus: 0))
engine.connect(equalizer, to: engine.mainMixerNode, format: inputNode.inputFormat(forBus: 0))
engine.connect(engine.mainMixerNode, to: engine.outputNode, format: inputNode.inputFormat(forBus: 0))
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 5000,
channels: 1,
interleaved: false)!
// Converter to downgrade sample rate
guard let converter: AVAudioConverter = AVAudioConverter(from: inputNode.inputFormat(forBus: 0), to: outputFormat) else {
print("Can't convert in to this format")
return
}
engine.mainMixerNode.installTap(onBus: bus, bufferSize: 2688, format: engine.mainMixerNode.outputFormat(forBus: 0)) { (buffer, time) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if status == .haveData {
// Process with converted buffer
}
}
engine.prepare()
do {
try engine.start()
} catch {
print("Can't start the engine: \(error)")
}
Issue
low-pass and high-pass filters are not working.
Alternate Approach
To check code is working or not, we have added a reverb effect instead of lowpass filter.
Reverb effect(Using AVAudioUnitReverb) works with same code.
Can anyone help me where are we doing wrong in applying lowpass filter?
I think the main problem with this code was that the AVAudioConverter was being created before calling engine.prepare() which can and will change the mainMixerNode output format. Aside from that, there was a redundant connection of mainMixerNode to outputNode, along with a probably incorrect format - mainMixerNode is documented to be automatically created and connected to the output node "on demand". The tap also did not need a format.
let bus = 0
let inputNode = engine.inputNode
let equalizer = AVAudioUnitEQ(numberOfBands: 2)
equalizer.bands[0].filterType = .lowPass
equalizer.bands[0].frequency = 3000
equalizer.bands[0].bypass = false
equalizer.bands[1].filterType = .highPass
equalizer.bands[1].frequency = 1000
equalizer.bands[1].bypass = false
engine.attach(equalizer) //Attach equalizer
// Connect nodes
engine.connect(inputNode, to: equalizer, format: inputNode.inputFormat(forBus: 0))
engine.connect(equalizer, to: engine.mainMixerNode, format: inputNode.inputFormat(forBus: 0))
// call before creating converter because this changes the mainMixer's output format
engine.prepare()
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 5000,
channels: 1,
interleaved: false)!
// Downsampling converter
guard let converter: AVAudioConverter = AVAudioConverter(from: engine.mainMixerNode.outputFormat(forBus: 0), to: outputFormat) else {
print("Can't convert in to this format")
return
}
engine.mainMixerNode.installTap(onBus: bus, bufferSize: 2688, format: nil) { (buffer, time) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if status == .haveData {
// Process with converted buffer
}
}
do {
try engine.start()
} catch {
print("Can't start the engine: \(error)")
}
override func viewDidLoad() {
super.viewDidLoad()
lableItem.text = "Select Frequency"
setUpDropdown()
navigationItem.title = "High Pass Filter"
do{
try audioSession.setCategory(.playAndRecord, mode: .default, options: [.mixWithOthers, .defaultToSpeaker,.allowBluetoothA2DP,.allowAirPlay,.allowBluetooth])
try audioSession.setActive(true)
} catch{
print(error.localizedDescription)
}
let bus = 0
let inputNode = engine.inputNode
let equalizer = AVAudioUnitEQ(numberOfBands: 2)
equalizer.bands[0].filterType = .highPass
equalizer.bands[0].frequency = 20000.0
equalizer.bands[0].bypass = false
equalizer.bands[1].filterType = .lowPass
equalizer.bands[1].frequency = 1000.0
equalizer.bands[1].bypass = false
engine.attach(equalizer) //Attach equalizer
// Connect nodes
engine.connect(inputNode, to: equalizer, format: inputNode.inputFormat(forBus: 0))
engine.connect(equalizer, to: engine.mainMixerNode, format: inputNode.inputFormat(forBus: 0))
// call before creating converter because this changes the mainMixer's output format
engine.prepare()
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt32,
sampleRate: 44100,
channels: 1,
interleaved: false)!
// Downsampling converter
guard let converter: AVAudioConverter = AVAudioConverter(from: engine.mainMixerNode.outputFormat(forBus: 0), to: outputFormat) else {
print("Can't convert in to this format")
return
}
engine.mainMixerNode.installTap(onBus: bus, bufferSize: 2688, format: nil) { (buffer, time) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if status == .haveData {
// Process with converted buffer
}
}
do {
try engine.start()
} catch {
print("Can't start the engine: \(error)")
}
}

How can mp3 data in memory be loaded into an AVAudioPCMBuffer in Swift?

I have a class method to read an mp3 file into an AVAudioPCMBuffer as follows:
private(set) var fullAudio: AVAudioPCMBuffer?
func initAudio(audioFileURL: URL) -> Bool {
var status = true
do {
let audioFile = try AVAudioFile(forReading: audioFileURL)
let audioFormat = audioFile.processingFormat
let audioFrameLength = UInt32(audioFile.length)
fullAudio = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameLength)
if let fullAudio = fullAudio {
try audioFile.read(into: fullAudio)
// processing of full audio
}
} catch {
status = false
}
return status
}
However, I now need to be able to read the same mp3 info from memory (rather than a file) into the AVAudioPCMBuffer without using the file system, where the info is held in the Data type, for example using a function declaration of the form
func initAudio(audioFileData: Data) -> Bool {
// some code setting up fullAudio
}
How can this be done? I've looked to see whether there is a route from Data holding mp3 info to AVAudioPCMBuffer (e.g. via AVAudioBuffer or AVAudioCompressedBuffer), but haven't seen a way forward.
I went down the rabbit hole on this one. Here is what probably amounts to a Rube Goldberg-esque solution:
A lot of the pain comes from using C from Swift.
func data_AudioFile_ReadProc(_ inClientData: UnsafeMutableRawPointer, _ inPosition: Int64, _ requestCount: UInt32, _ buffer: UnsafeMutableRawPointer, _ actualCount: UnsafeMutablePointer<UInt32>) -> OSStatus {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
let bufferPointer = UnsafeMutableRawBufferPointer(start: buffer, count: Int(requestCount))
let copied = data.copyBytes(to: bufferPointer, from: Int(inPosition) ..< Int(inPosition) + Int(requestCount))
actualCount.pointee = UInt32(copied)
return noErr
}
func data_AudioFile_GetSizeProc(_ inClientData: UnsafeMutableRawPointer) -> Int64 {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
return Int64(data.count)
}
extension Data {
func convertedTo(_ format: AVAudioFormat) -> AVAudioPCMBuffer? {
var data = self
var af: AudioFileID? = nil
var status = AudioFileOpenWithCallbacks(&data, data_AudioFile_ReadProc, nil, data_AudioFile_GetSizeProc(_:), nil, 0, &af)
guard status == noErr, af != nil else {
return nil
}
defer {
AudioFileClose(af!)
}
var eaf: ExtAudioFileRef? = nil
status = ExtAudioFileWrapAudioFileID(af!, false, &eaf)
guard status == noErr, eaf != nil else {
return nil
}
defer {
ExtAudioFileDispose(eaf!)
}
var clientFormat = format.streamDescription.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientDataFormat, UInt32(MemoryLayout.size(ofValue: clientFormat)), &clientFormat)
guard status == noErr else {
return nil
}
if let channelLayout = format.channelLayout {
var clientChannelLayout = channelLayout.layout.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientChannelLayout, UInt32(MemoryLayout.size(ofValue: clientChannelLayout)), &clientChannelLayout)
guard status == noErr else {
return nil
}
}
var frameLength: Int64 = 0
var propertySize: UInt32 = UInt32(MemoryLayout.size(ofValue: frameLength))
status = ExtAudioFileGetProperty(eaf!, kExtAudioFileProperty_FileLengthFrames, &propertySize, &frameLength)
guard status == noErr else {
return nil
}
guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(frameLength)) else {
return nil
}
let bufferSizeFrames = 512
let bufferSizeBytes = Int(format.streamDescription.pointee.mBytesPerFrame) * bufferSizeFrames
let numBuffers = format.isInterleaved ? 1 : Int(format.channelCount)
let numInterleavedChannels = format.isInterleaved ? Int(format.channelCount) : 1
let audioBufferList = AudioBufferList.allocate(maximumBuffers: numBuffers)
for i in 0 ..< numBuffers {
audioBufferList[i] = AudioBuffer(mNumberChannels: UInt32(numInterleavedChannels), mDataByteSize: UInt32(bufferSizeBytes), mData: malloc(bufferSizeBytes))
}
defer {
for buffer in audioBufferList {
free(buffer.mData)
}
free(audioBufferList.unsafeMutablePointer)
}
while true {
var frameCount: UInt32 = UInt32(bufferSizeFrames)
status = ExtAudioFileRead(eaf!, &frameCount, audioBufferList.unsafeMutablePointer)
guard status == noErr else {
return nil
}
if frameCount == 0 {
break
}
let src = audioBufferList
let dst = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList)
if src.count != dst.count {
return nil
}
for i in 0 ..< src.count {
let srcBuf = src[i]
let dstBuf = dst[i]
memcpy(dstBuf.mData?.advanced(by: Int(dstBuf.mDataByteSize)), srcBuf.mData, Int(srcBuf.mDataByteSize))
}
pcmBuffer.frameLength += frameCount
}
return pcmBuffer
}
}
A more robust solution would probably read the sample rate and channel count and give the option to preserve them.
Tested using:
let url = URL(fileURLWithPath: "/tmp/test.mp3")
let data = try! Data(contentsOf: url)
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)!
if let d = data.convertedTo(format) {
let avf = try! AVAudioFile(forWriting: URL(fileURLWithPath: "/tmp/foo.wav"), settings: format.settings, commonFormat: format.commonFormat, interleaved: format.isInterleaved)
try! avf.write(from: d)
}

Using AudioToolbox instead of AVFoundation in SFSpeechRecognizer

I have to use AudioToolbox instead AVAudioSession for providing stream to SFSpeechRecognizer. I know that I should use AudioQueue, so I made an audio recording export to CMSampleBuffer to read it with recognizer. And while debugging I see that the buffer is added to SFSpeechAudioBufferRecognitionRequest, but the code in the task closure doesn't execute: neither result, nor error.
What's wrong with the code?
let NUM_BUFFERS = 1
struct RecordState {
var dataFormat = AudioStreamBasicDescription()
var queue: AudioQueueRef?
var buffers: [AudioQueueBufferRef] = []
var audioFile: AudioFileID?
var currentPacket: Int64 = 0
var recording = false
}
func сallback(_ inUserData: UnsafeMutableRawPointer?,
_ inAQ: AudioQueueRef,
_ inBuffer: AudioQueueBufferRef,
_ inStartTime: UnsafePointer<AudioTimeStamp>,
_ inNumberPacketDescriptions: UInt32,
_ inPacketDescs: UnsafePointer<AudioStreamPacketDescription>?) {
let recordState = inUserData?.assumingMemoryBound(to: RecordState.self)
if let queue = recordState?.pointee.queue {
AudioQueueEnqueueBuffer(queue, inBuffer, 0, nil)
let rec = AudioRecorder.sharedInstance
rec.transformBuffer(pBuffer: inBuffer, pLength: inBuffer.pointee.mAudioDataByteSize)
}
}
class AudioRecorder: NSObject, ObservableObject, SFSpeechRecognizerDelegate {
let format = AudioStreamBasicDescription(mSampleRate: Float64(16000.0), mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagsNativeFloatPacked, mBytesPerPacket: UInt32(MemoryLayout<Float32>.size), mFramesPerPacket: 1, mBytesPerFrame: UInt32(MemoryLayout<Float32>.size), mChannelsPerFrame: 1, mBitsPerChannel: UInt32(MemoryLayout<Float32>.size * 8), mReserved: 0)
var recordState = RecordState()
var startTime = CFAbsoluteTimeGetCurrent()
static var sharedInstance = AudioRecorder()
private var speechRecognizer = SFSpeechRecognizer()!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var engineEnabled = false
private var lastText = [SFTranscriptionSegment]()
override init() {
super.init()
OperationQueue.main.addOperation {
SFSpeechRecognizer.requestAuthorization { authStatus in
switch authStatus {
case .authorized:
self.engineEnabled = true
default:
self.engineEnabled = false
}
}
}
self.speechRecognizer.delegate = self
}
func startRecording() {
recordState.dataFormat = format
var queue: AudioQueueRef?
if AudioQueueNewInput(&recordState.dataFormat, сallback, &recordState, CFRunLoopGetCurrent(), CFRunLoopMode.commonModes.rawValue, 0, &queue) == noErr {
recordState.queue = queue
} else {
return
}
for _ in 0..<NUM_BUFFERS {
var buffer: AudioQueueBufferRef?
if AudioQueueAllocateBuffer(queue!, 1024, &buffer) == noErr {
recordState.buffers.append(buffer!)
}
AudioQueueEnqueueBuffer(queue!, buffer!, 0, nil)
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
// Keep speech recognition data on device
if #available(iOS 13, *) {
recognitionRequest.requiresOnDeviceRecognition = true
}
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
print(result.bestTranscription.formattedString)
isFinal = result.isFinal
}
if error != nil || isFinal {
// Stop recognizing speech if there is a problem.
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
recordState.recording = true
if AudioQueueStart(recordState.queue!, nil) != noErr {
fatalError("Something is wrong")
}
self.startTime = CFAbsoluteTimeGetCurrent()
}
func stopRecording() {
recordState.recording = false
AudioQueueStop(recordState.queue!, true)
for i in 0..<NUM_BUFFERS {
if let buffers = recordState.buffers[i] as? AudioQueueBufferRef {
AudioQueueFreeBuffer(recordState.queue!, buffers)
}
}
AudioQueueDispose(recordState.queue!, true)
if let file = recordState.audioFile {
AudioFileClose(file)
}
}
func transformBuffer(pBuffer: AudioQueueBufferRef, pLength: UInt32) {
var blockBuffer: CMBlockBuffer?
CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: pBuffer, blockLength: Int(pLength), blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData: 0, dataLength: Int(pLength), flags: kCMBlockBufferAssureMemoryNowFlag, blockBufferOut: &blockBuffer)
let timeFormat = format.mSampleRate
let currentTime = CFAbsoluteTimeGetCurrent()
let elapsedTime: CFTimeInterval = currentTime - self.startTime
let timeStamp = CMTimeMake(value: Int64(elapsedTime * timeFormat), timescale: Int32(timeFormat))
let nSamples = Int(pLength / format.mBytesPerFrame)
do {
let formatDescription = try CMAudioFormatDescription(audioStreamBasicDescription: format)
var sampleBuffer: CMSampleBuffer?
CMAudioSampleBufferCreateWithPacketDescriptions(allocator: kCFAllocatorDefault, dataBuffer: blockBuffer, dataReady: true, makeDataReadyCallback: nil, refcon: nil, formatDescription: formatDescription, sampleCount: nSamples, presentationTimeStamp: timeStamp, packetDescriptions: nil, sampleBufferOut: &sampleBuffer)
if let sBuffer = sampleBuffer {
self.recognitionRequest?.appendAudioSampleBuffer(sBuffer)
}
} catch {
fatalError(error.localizedDescription)
}
}
}
UPD: I modified the code so it could be more descriptive
Finally, I've found the answer. Here's the code for the conversion of AudioQueueBufferRef into AVAudioPCMBuffer:
func queueBufferToAudioBuffer(_ buffer: AudioQueueBufferRef) -> AVAudioPCMBuffer? {
guard let audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: format.mSampleRate,
channels: format.mChannelsPerFrame,
interleaved: true)
else { return nil }
let frameLength = buffer.pointee.mAudioDataBytesCapacity / audioFormat.streamDescription.pointee.mBytesPerFrame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameLength) else { return nil }
audioBuffer.frameLength = frameLength
let dstLeft = audioBuffer.floatChannelData![0]
let src = buffer.pointee.mAudioData.bindMemory(to: Float.self, capacity: Int(frameLength))
dstLeft.initialize(from: src, count: Int(frameLength))
return audioBuffer
}
I fixed this by setting up the AVAudioSession before AudioQueueStart.
do{
try AVAudioSession.sharedInstance().setCategory(.record, mode:.default)
try AVAudioSession.sharedInstance().setActive(true)
} catch{
print(error)
}