MusicTrackSetDestNode equivalent for AVAudioUnitSampler (AVAudioEngine)? - avaudioengine

By following this blog article, I am able to connect MusicTrack in MusicSequence to a desired AUNode in AUGraph.
Since AUGraph is deprecated, I'm looking for an AVAudioEngine equivalent. Is there a way for a MusicTrack to play MIDI notes for a separate and specific AVAudioUnitSampler?
The below is my code so far.
func createMusicSequence() -> MusicSequence {
var musicSequence: MusicSequence?
var status = NewMusicSequence(&musicSequence)
if status != noErr {
print("bad status when creating a musicSequence \(status)")
}
// add tracks
var pianoTrack: MusicTrack?
status = MusicSequenceNewTrack(musicSequence!, &pianoTrack)
if status != noErr {
print("error creating piano track \(status)")
}
var metronomeTrack: MusicTrack?
status = MusicSequenceNewTrack(musicSequence!, &metronomeTrack)
if status != noErr {
print("error creating metronome track \(status)")
}
var time = MusicTimeStamp(0.0)
for i:UInt8 in 60...80 {
var mess = MIDINoteMessage(channel: 0,
note: i,
velocity: 64,
releaseVelocity: 0,
duration: 1.0)
status = MusicTrackNewMIDINoteEvent(pianoTrack!, time, &mess)
if status != noErr {
print("failed to assign note to piano track")
}
mess = MIDINoteMessage(channel: 0,
note: 77,
velocity: 64,
releaseVelocity: 0,
duration: 1.0)
status = MusicTrackNewMIDINoteEvent(metronomeTrack!, time, &mess)
if status != noErr {
print("failed to assign mess to the metronome track")
}
time += 1
}
// associating the musicSequence with the AUGraph
MusicSequenceSetAUGraph(musicSequence!, processingGraph!)
// in order for the track to be connected to the desired node, the below needs to be configured after associating the musicSequence with the AUGraph
MusicSequenceGetIndTrack(musicSequence!, 0, &pianoTrack)
MusicTrackSetDestNode(pianoTrack!, pianoNode)
MusicSequenceGetIndTrack(musicSequence!, 1, &metronomeTrack)
MusicTrackSetDestNode(metronomeTrack!, metronomeNode)
return musicSequence!
}
Edit
I came up with a solution for my problem using AudioKit.
class PlaySound {
static let shared = PlaySound()
// need to set Background Modes
var pianoSampler = MIDISampler(name: "piano")
var sequencer = AppleSequencer()
var metronomeSampler = MIDISampler(name: "metronome")
var mixer = Mixer()
var engine = AudioEngine()
private init() {
setup()
setSequence()
}
func play() {
sequencer.play()
}
func setup() {
mixer.addInput(pianoSampler)
mixer.addInput(metronomeSampler)
engine.output = mixer
loadSF2(name: "Nice-Steinway-Lite-v3.0", ext: "sf2", preset: 0, sampler: pianoSampler)
loadMetronome(name: "Metronom", ext: "sf2", preset: 48, sampler: metronomeSampler)
do {
try engine.start()
} catch {
print("error starting the engine: \(error)")
}
}
func loadSF2(name: String, ext: String, preset: Int, sampler: MIDISampler) {
guard let url = Bundle.main.url(forResource: name, withExtension: ext) else {
print("Could not get SoundFont URL")
return
}
do {
try sampler.loadMelodicSoundFont(url: url, preset: preset)
} catch {
print("can not load SoundFont \(name) with error: \(error)")
}
}
func loadMetronome(name: String, ext: String, preset: Int, sampler: MIDISampler) {
do {
try sampler.loadSoundFont(name, preset: preset, bank: 128, in: .main)
} catch {
print("can not load SoundFont \(name) with error: \(error)")
}
}
func setSequence() {
let pianoTrackManager = sequencer.newTrack("piano")
let metronomeTrackManager = sequencer.newTrack("metronome")
var time = Duration(beats: 0.0)
for _ in 0...10 {
let note = UInt8.random(in: 48...72)
pianoTrackManager?.add(noteNumber: MIDINoteNumber(note), velocity: 64, position: time, duration: Duration(beats: 0.5))
metronomeTrackManager?.add(noteNumber: 77, velocity: 64, position: time, duration: Duration(beats: 0.5))
time += Duration(beats: 1.0)
}
sequencer.setTempo(30)
pianoTrackManager?.setMIDIOutput(pianoSampler.midiIn)
metronomeTrackManager?.setMIDIOutput(metronomeSampler.midiIn)
}
}

Related

No speech detected in SwiftUI iOS

I am trying to implement a speech-to-text feature in my app but I am facing this error:
Recognition error: Error Domain=kAFAssistantErrorDomain Code=1110 "No speech detected"
I am testing it on a real device
Relevant code:
#State private var isRecording = false {
didSet {
if !isRecording { recognitionTask = nil }
}
}
#State private var recognitionTask: SFSpeechRecognitionTask?
private func startRecording() {
isRecording = true
let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!
// Create an SFSpeechAudioBufferRecognitionRequest instance
let request = SFSpeechAudioBufferRecognitionRequest()
let audioEngine = AVAudioEngine()
let inputNode = audioEngine.inputNode
// Append audio samples to the request object
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputNode.outputFormat(forBus: 0)) { (buffer, time) in
request.append(buffer)
}
// Start the audio engine and start recording
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("Audio engine error: ",error)
return
}
// Start recognizing speech from the audio samples
recognitionTask = speechRecognizer.recognitionTask(with: request) { (result, error) in
// Check for errors
if let error {
print("Recognition error: ",error)
return
}
guard let result else { return }
// Update the task variable with the transcription result
self.task = result.bestTranscription.formattedString
}
}
private func stopRecording() {
recognitionTask?.cancel()
isRecording = false
}
and then I have 2 buttons to indicate if it's recording or not and also to trigger the stopRecording() function and the startRecording() function.
Here's the snippet:
if isRecording {
Button {
stopRecording()
} label: {
Image(systemName: "mic.circle.fill")
.resizable()
.frame(width: 25, height: 25)
.foregroundColor(.red)
}
} else {
Button {
startRecording()
} label: {
Image(systemName: "mic.circle.fill")
.resizable()
.frame(width: 25, height: 25)
}
}
Why do I get this error?

Has recent update affected genData in AudioKit?

*** Update 2 ***
Ok, done some more digging and managed to get things working with MIKMIDI by starting at position 1 rather than position 0; the same fix hasn't worked with AudioKit.
Further, I've created a new, ugly AF, app that replicates behaviour across both frameworks and outputs, among other things, the track data, and there is definitely a difference between them. I include the code below for perusal. You'll need both MIKMIDI and AudioKit available as frameworks, and a soundfont. Both appear to be working identically but the generated data is different. Again, it could be that I'm making a fundamental error for which I apologise if that's the case, but if anyone can point out the issue I'd be grateful. Many thanks.
import SwiftUI
import MIKMIDI
import AudioKit
let MIKsequence = MIKMIDISequence()
let MIKsequencer = MIKMIDISequencer()
var AKoutputSampler = MIDISampler(name: "output")
var AKsequencer = AppleSequencer()
var AKmixer = Mixer()
var AKengine = AudioEngine()
func AKsetup() {
print("AK Setup Start---------")
AKmixer.addInput(AKoutputSampler)
AKengine.output = AKmixer
do {
try AKengine.start()
} catch {
print("error starting the engine: \(error)")
}
print("AK Setup End ----------")
}
func AKinitialise(){
print("AK Initilise Start --------")
AKsequencer = AppleSequencer()
for t in 0..<AKsequencer.tracks.count {
AKsequencer.deleteTrack(trackIndex: t)
}
let AKtrackManager = AKsequencer.newTrack("piano")
for note in 1..<6{
AKtrackManager?.add(noteNumber: MIDINoteNumber(note+60), velocity: 100, position: Duration(beats: Double(note * 16)/16), duration: Duration(beats: 0.25),channel: 1)
}
let length = AKtrackManager?.length
print("Length = \(length)")
let mnd : [MIDINoteData] = (AKtrackManager?.getMIDINoteData())!
for d in mnd {
print("Note \(d.noteNumber), position \(d.position.seconds)")
}
AKsequencer.setLength(Duration(beats: Double(length!)))
AKsequencer.disableLooping()
AKsequencer.setTempo(120)
AKsequencer.addTimeSignatureEvent(timeSignature: TimeSignature(topValue: 4, bottomValue: .four))
AKtrackManager?.setMIDIOutput(AKoutputSampler.midiIn)
let hexValues = AKsequencer.genData()!.map { String(format: "%02X", $0) }
print(hexValues.joined(separator: " "))
AKsequencer.debug()
print("AK Initialise End ---------")
}
func loadSF2(name: String, ext: String, preset: Int, sampler: MIDISampler) {
print("Load SF2 Start")
guard let url = Bundle.main.url(forResource: name, withExtension: ext) else {
print("LoadSF2: Could not get SoundFont URL")
return
}
do {
try sampler.loadMelodicSoundFont(url: url, preset: preset)
} catch {
print("can not load SoundFont \(name) with error: \(error)")
}
print("Load SF2 End")
}
func AKplay() {
AKengine.stop()
loadSF2(name: "Chaos Bank", ext: "sf2", preset: 1, sampler: AKoutputSampler)
do {
try AKengine.start()
} catch {
print("error starting the engine: \(error)")
}
AKsequencer.play()
}
func AKstop(){
AKsequencer.stop()
AKsequencer.rewind()
}
func MIKinitialise(){
print("MIK Initialise Start")
do {
let tempo = 120.0
let signature = MIKMIDITimeSignature(numerator: 4, denominator: 4)
MIKsequence.setOverallTempo(tempo)
MIKsequence.setOverallTimeSignature(signature)
for t in MIKsequence.tracks {
MIKsequence.removeTrack(t)
}
let _ = try MIKsequence.addTrack()
let track = MIKsequence.tracks[0]
let trackSynth = MIKsequencer.builtinSynthesizer(for: track)
if let soundfont = Bundle.main.url(forResource: "Chaos Bank", withExtension: "sf2") {
do {
try trackSynth?.loadSoundfontFromFile(at: soundfont)
} catch {
print("can not load SoundFont with error: \(error)")
}
let instrumentId = MIKMIDISynthesizerInstrument(id: 10, name: "Eric")
try trackSynth!.selectInstrument(instrumentId!, error: ())
print("Available Instruments \(trackSynth!.availableInstruments)")
}
var notes = [MIKMIDINoteEvent]()
for n in 1..<6 {
let note = MIKMIDINoteEvent(timeStamp:Double(n),note:UInt8(60 + n),velocity:100,duration:0.25,channel:1)
notes.append(note)
}
track.addEvents(notes)
let length = track.length
MIKsequence.length = length
MIKsequencer.sequence = MIKsequence
print("Duration in seconds \(MIKsequencer.sequence.durationInSeconds)")
print("Tempo Track \(MIKsequence.tempoTrack.length), \(MIKsequence.tempoTrack.notes.count)")
for t in MIKsequence.tracks {
print("Track Number \(t.trackNumber)")
for notes in t.notes {
print("Note \(notes.note), \(notes.duration), \(notes.timeStamp)")
}
}
let hexValues = MIKsequencer.sequence.dataValue!.map { String(format: "%02X", $0) }
print(hexValues.joined(separator: " "))
} catch let error {
print(error.localizedDescription)
}
print("MIK Initialise End")
}
func startMIKPlayback(){
MIKsequencer.startPlayback()
}
func stopMIKPlayback(){
MIKsequencer.stop()
}
func getDocumentsDirectory() -> URL {
// find all possible documents directories for this user
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
// just send back the first one, which ought to be the only one
print(paths[0])
return paths[0]
}
func saveMIKFile()->String{
let date = Date()
let dateFormatter = DateFormatter()
dateFormatter.dateFormat = "YYYYMMddHHmmss"
let filename = dateFormatter.string(from: date) + " MIK Song.mid"
try! MIKsequence.write(to: getDocumentsDirectory().appendingPathComponent(filename))
return getDocumentsDirectory().absoluteString
}
func saveAudioKitFile()->String{
let date = Date()
let dateFormatter = DateFormatter()
dateFormatter.dateFormat = "YYYYMMddHHmmss"
let filename = dateFormatter.string(from: date) + "AK Song.mid"
try! AKsequencer.genData()!.write(to: getDocumentsDirectory().appendingPathComponent(filename))
return getDocumentsDirectory().absoluteString
}
struct ContentView: View {
init(){
AKsetup()
MIKinitialise()
}
var body: some View {
HStack{
VStack {
Text("MIKMIDI Test 01")
Button("Play", action: startMIKPlayback)
Button("Stop", action: stopMIKPlayback)
Button("Save") {
let _ = print(saveMIKFile())
}
}
.padding()
VStack {
Text("AudioKit Test 01")
let _ = AKinitialise()
Button("Play", action: AKplay)
Button("Stop", action: AKstop)
Button("Save") {
let _ = print(saveAudioKitFile())
}
}
.padding()
}
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
*** End Update 2 ***
*** Update ***
Still having the same problem, and now I've tried with both AudioKit and MIKMIDI. I've run the generated file through a midi analyzer online and it says "Undefined Variable: Last". I've reached out to both MIKMIDI and Midi-Analyzer authors to see if they can assist but if anyone can throw light on this issue, I'd be grateful.
*** End Update ***
I'm working on an app that saves a midi sequence to file, using AudioKit and genData(). However, it seems that a recent update - either OS or Audiokit - has affected the way things save.
The startnote now seems to be offset on tracks by a varying amount, and the rest of the track then follows that offset. Oftentimes the end notes of the track may be missing. This problem was not occurring until recently.
Showing output of the sequence shows the data in the correct positions but it's coming out like this (the notes should be starting at position 0 in the track):
Pattern Offset shown in Garageband
I've also had difficulty in importing the same midi file into other packages; again, this wasn't a problem until recently.
I'm happy to be told I'm doing something amiss but, as I've said, it seems to have been working up until recently.
Any help would be really appreciated.
func getDocumentsDirectory() -> URL {
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
return paths[0]
}
func saveFile()->String{
try! sequencer.genData()!.write(to: getDocumentsDirectory().appendingPathComponent("QuickTestmidi.mid"))
return getDocumentsDirectory().absoluteString
}
func setSequence_01(){
var trackLength = 0.0
sequencer.tracks[0].setMIDIOutput(outputSampler[0].midiIn)
for track in sequencer.tracks {
track.clear()
}
for i in 0..<16 {
sequencer.tracks[0].add(noteNumber: MIDINoteNumber(96 + i), velocity: MIDIVelocity(100), position: Duration(beats: Double(2 * i)), duration: Duration(beats: 1),channel: 1)
trackLength = Double(sequencer.tracks[0].length)
sequencer.setLength(Duration(beats:trackLength))
sequencer.enableLooping()
sequencer.setTempo(120)
sequencer.addTimeSignatureEvent(timeSignature: TimeSignature(topValue: 4, bottomValue: .four))
}
}

AudioKit Conflict between Midi Instrument and Mic behavior

I am trying to make my app produce midi notes at the same time listening to the input from the mic:
var engine = AudioEngine()
var initialDevice: Device!
var mic: AudioEngine.InputNode!
var tappableNodeA: Fader!
var tappableNodeB: Fader!
var tappableNodeC: Fader!
var silence: Fader!
var tracker: PitchTap!
private var instrument = MIDISampler(name: "Instrument 1")
func noteOn(note: MIDINoteNumber) {
instrument.play(noteNumber: note, velocity: 90, channel: 0)
}
func noteOff(note: MIDINoteNumber) {
instrument.stop(noteNumber: note, channel: 0)
}
override func viewDidLoad() {
super.viewDidLoad()
print("init started ")
guard let input = engine.input else { fatalError() }
guard let device = engine.inputDevice else { fatalError() }
print("input selected")
initialDevice = device
engine.output = instrument
mic = input
tappableNodeA = Fader(mic)
tappableNodeB = Fader(tappableNodeA)
tappableNodeC = Fader(tappableNodeB)
silence = Fader(tappableNodeC, gain: 0)
engine.output = silence
print("objects init")
tracker = PitchTap(mic) { pitch, amp in
DispatchQueue.main.async {
self.update(pitch[0], amp[0])
}
}
start()
// other init that are not related
}
The start function is written below:
func start() {
do {
if let fileURL = Bundle.main.url(forResource: "Sounds/Sampler Instruments/sawPiano1", withExtension: "exs") {
try instrument.loadInstrument(url: fileURL)
} else {
Log("Could not find file")
}
} catch {
Log("Could not load instrument")
}
do {
try engine.start()
tracker.start()
} catch let err {
print("caught error at start")
Log(err)
}
}
As long as I making the first try call to set up the instrument I get the following error:
*** Terminating app due to uncaught exception 'com.apple.coreaudio.avfaudio', reason: 'required condition is false: _engine != nil
Why the would the condition be false?
Ok, so the solution was to separate the calls into two functions, and position the first call before tapNode configuration:
var engine = AudioEngine()
var initialDevice: Device!
var mic: AudioEngine.InputNode!
var tappableNodeA: Fader!
var tappableNodeB: Fader!
var tappableNodeC: Fader!
var silence: Fader!
var tracker: PitchTap!
private var instrument = MIDISampler(name: "Instrument 1")
func noteOn(note: MIDINoteNumber) {
instrument.play(noteNumber: note, velocity: 90, channel: 0)
}
func noteOff(note: MIDINoteNumber) {
instrument.stop(noteNumber: note, channel: 0)
}
override func viewDidLoad() {
super.viewDidLoad()
print("init started ")
guard let input = engine.input else { fatalError() }
guard let device = engine.inputDevice else { fatalError() }
print("input selected")
initialDevice = device
engine.output = instrument
start1()
mic = input
tappableNodeA = Fader(mic)
tappableNodeB = Fader(tappableNodeA)
tappableNodeC = Fader(tappableNodeB)
silence = Fader(tappableNodeC, gain: 0)
engine.output = silence
print("objects init")
tracker = PitchTap(mic) { pitch, amp in
DispatchQueue.main.async {
self.update(pitch[0], amp[0])
}
}
start()
// other init that are not related
}
func start1(){
do {
if let fileURL = Bundle.main.url(forResource: "Sounds/Sampler Instruments/sawPiano1", withExtension: "exs") {
try instrument.loadInstrument(url: fileURL)
} else {
Log("Could not find file")
}
} catch let err {
Log("Could not load instrument")
Log(err)
}
}
func start() {
do {
try engine.start()
tracker.start()
} catch let err {
print("caught error at start")
Log(err)
}
}
Although the exception is now gone, there is still no sound being played for some reason.

AVFoundation route audio between two non-system input and ouputs

I've been trying to route audio from a virtual Soundflower device to another hardware speaker. The Soundflower virtual device is my system output. I want my AVEAudioEngine to take Soundflower input and output to the hardware speaker.
However having researched it seems AVAudioEngine only support RIO devices. I've looked AudioKit and Output Splitter example however I was getting crackling and unsatisfactory results. My bones of my code is as follows
static func set(device: String, isInput: Bool, toUnit unit: AudioUnit) -> Int {
let devs = (isInput ? EZAudioDevice.inputDevices() : EZAudioDevice.outputDevices()) as! [EZAudioDevice]
let mic = devs.first(where: { $0.name == device})!
var inputID = mic.deviceID // replace with actual, dynamic value
AudioUnitSetProperty(unit, kAudioOutputUnitProperty_CurrentDevice,
kAudioUnitScope_Global, 0, &inputID, UInt32(MemoryLayout<AudioDeviceID>.size))
return Int(inputID)
}
let outputRenderCallback: AURenderCallback = {
(inRefCon: UnsafeMutableRawPointer,
ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp: UnsafePointer<AudioTimeStamp>,
inBusNumber: UInt32,
inNumberFrames: UInt32,
ioData: UnsafeMutablePointer<AudioBufferList>?) -> OSStatus in
// Get Refs
let buffer = UnsafeMutableAudioBufferListPointer(ioData)
let engine = Unmanaged<Engine>.fromOpaque(inRefCon).takeUnretainedValue()
// If Engine hasn't saved any data yet just output silence
if (engine.latestSampleTime == nil) {
//makeBufferSilent(buffer!)
return noErr
}
// Read the latest available Sample
let sampleTime = engine.latestSampleTime
if let err = checkErr(engine.ringBuffer.fetch(ioData!, framesToRead: inNumberFrames, startRead: sampleTime!).rawValue) {
//makeBufferSilent(buffer!)
return err
}
return noErr
}
private let trailEngine: AVAudioEngine
private let subEngine: AVAudioEngine
init() {
subEngine = AVAudioEngine()
let inputUnit = subEngine.inputNode.audioUnit!
print(Engine.set(device: "Soundflower (2ch)", isInput: true, toUnit: inputUnit))
trailEngine = AVAudioEngine()
let outputUnit = trailEngine.outputNode.audioUnit!
print(Engine.set(device: "Boom 3", isInput: false, toUnit: outputUnit))
subEngine.inputNode.installTap(onBus: 0, bufferSize: 2048, format: nil) { [weak self] (buffer, time) in
guard let self = self else { return }
let sampleTime = time.sampleTime
self.latestSampleTime = sampleTime
// Write to RingBuffer
if let _ = checkErr(self.ringBuffer.store(buffer.audioBufferList, framesToWrite: 2048, startWrite: sampleTime).rawValue) {
//makeBufferSilent(UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList))
}
}
var renderCallbackStruct = AURenderCallbackStruct(
inputProc: outputRenderCallback,
inputProcRefCon: UnsafeMutableRawPointer(Unmanaged<Engine>.passUnretained(self).toOpaque())
)
if let _ = checkErr(
AudioUnitSetProperty(
trailEngine.outputNode.audioUnit!,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Global,
0,
&renderCallbackStruct,
UInt32(MemoryLayout<AURenderCallbackStruct>.size)
)
) {
return
}
subEngine.prepare()
trailEngine.prepare()
ringBuffer = RingBuffer<Float>(numberOfChannels: 2, capacityFrames: UInt32(4800 * 20))
do {
try self.subEngine.start()
} catch {
print("Error starting the input engine: \(error)")
}
DispatchQueue.main.asyncAfter(deadline: .now() + 0.01) {
do {
try self.trailEngine.start()
} catch {
print("Error starting the output engine: \(error)")
}
}
}
For reference the RingBuffer implementation is at:
https://github.com/vgorloff/CARingBuffer
and the AudioKit example
https://github.com/AudioKit/OutputSplitter/tree/master/OutputSplitter
I was using AudioKit 4 (however the example only uses AudioKit's device wrappers). The result of this code is super crackly audio through the speakers which suggests the signal is getting completely mangled in the transfer between the two engines. I am not too worried about latency between the two engines.

Using AudioToolbox instead of AVFoundation in SFSpeechRecognizer

I have to use AudioToolbox instead AVAudioSession for providing stream to SFSpeechRecognizer. I know that I should use AudioQueue, so I made an audio recording export to CMSampleBuffer to read it with recognizer. And while debugging I see that the buffer is added to SFSpeechAudioBufferRecognitionRequest, but the code in the task closure doesn't execute: neither result, nor error.
What's wrong with the code?
let NUM_BUFFERS = 1
struct RecordState {
var dataFormat = AudioStreamBasicDescription()
var queue: AudioQueueRef?
var buffers: [AudioQueueBufferRef] = []
var audioFile: AudioFileID?
var currentPacket: Int64 = 0
var recording = false
}
func сallback(_ inUserData: UnsafeMutableRawPointer?,
_ inAQ: AudioQueueRef,
_ inBuffer: AudioQueueBufferRef,
_ inStartTime: UnsafePointer<AudioTimeStamp>,
_ inNumberPacketDescriptions: UInt32,
_ inPacketDescs: UnsafePointer<AudioStreamPacketDescription>?) {
let recordState = inUserData?.assumingMemoryBound(to: RecordState.self)
if let queue = recordState?.pointee.queue {
AudioQueueEnqueueBuffer(queue, inBuffer, 0, nil)
let rec = AudioRecorder.sharedInstance
rec.transformBuffer(pBuffer: inBuffer, pLength: inBuffer.pointee.mAudioDataByteSize)
}
}
class AudioRecorder: NSObject, ObservableObject, SFSpeechRecognizerDelegate {
let format = AudioStreamBasicDescription(mSampleRate: Float64(16000.0), mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagsNativeFloatPacked, mBytesPerPacket: UInt32(MemoryLayout<Float32>.size), mFramesPerPacket: 1, mBytesPerFrame: UInt32(MemoryLayout<Float32>.size), mChannelsPerFrame: 1, mBitsPerChannel: UInt32(MemoryLayout<Float32>.size * 8), mReserved: 0)
var recordState = RecordState()
var startTime = CFAbsoluteTimeGetCurrent()
static var sharedInstance = AudioRecorder()
private var speechRecognizer = SFSpeechRecognizer()!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var engineEnabled = false
private var lastText = [SFTranscriptionSegment]()
override init() {
super.init()
OperationQueue.main.addOperation {
SFSpeechRecognizer.requestAuthorization { authStatus in
switch authStatus {
case .authorized:
self.engineEnabled = true
default:
self.engineEnabled = false
}
}
}
self.speechRecognizer.delegate = self
}
func startRecording() {
recordState.dataFormat = format
var queue: AudioQueueRef?
if AudioQueueNewInput(&recordState.dataFormat, сallback, &recordState, CFRunLoopGetCurrent(), CFRunLoopMode.commonModes.rawValue, 0, &queue) == noErr {
recordState.queue = queue
} else {
return
}
for _ in 0..<NUM_BUFFERS {
var buffer: AudioQueueBufferRef?
if AudioQueueAllocateBuffer(queue!, 1024, &buffer) == noErr {
recordState.buffers.append(buffer!)
}
AudioQueueEnqueueBuffer(queue!, buffer!, 0, nil)
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
// Keep speech recognition data on device
if #available(iOS 13, *) {
recognitionRequest.requiresOnDeviceRecognition = true
}
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
print(result.bestTranscription.formattedString)
isFinal = result.isFinal
}
if error != nil || isFinal {
// Stop recognizing speech if there is a problem.
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
recordState.recording = true
if AudioQueueStart(recordState.queue!, nil) != noErr {
fatalError("Something is wrong")
}
self.startTime = CFAbsoluteTimeGetCurrent()
}
func stopRecording() {
recordState.recording = false
AudioQueueStop(recordState.queue!, true)
for i in 0..<NUM_BUFFERS {
if let buffers = recordState.buffers[i] as? AudioQueueBufferRef {
AudioQueueFreeBuffer(recordState.queue!, buffers)
}
}
AudioQueueDispose(recordState.queue!, true)
if let file = recordState.audioFile {
AudioFileClose(file)
}
}
func transformBuffer(pBuffer: AudioQueueBufferRef, pLength: UInt32) {
var blockBuffer: CMBlockBuffer?
CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: pBuffer, blockLength: Int(pLength), blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData: 0, dataLength: Int(pLength), flags: kCMBlockBufferAssureMemoryNowFlag, blockBufferOut: &blockBuffer)
let timeFormat = format.mSampleRate
let currentTime = CFAbsoluteTimeGetCurrent()
let elapsedTime: CFTimeInterval = currentTime - self.startTime
let timeStamp = CMTimeMake(value: Int64(elapsedTime * timeFormat), timescale: Int32(timeFormat))
let nSamples = Int(pLength / format.mBytesPerFrame)
do {
let formatDescription = try CMAudioFormatDescription(audioStreamBasicDescription: format)
var sampleBuffer: CMSampleBuffer?
CMAudioSampleBufferCreateWithPacketDescriptions(allocator: kCFAllocatorDefault, dataBuffer: blockBuffer, dataReady: true, makeDataReadyCallback: nil, refcon: nil, formatDescription: formatDescription, sampleCount: nSamples, presentationTimeStamp: timeStamp, packetDescriptions: nil, sampleBufferOut: &sampleBuffer)
if let sBuffer = sampleBuffer {
self.recognitionRequest?.appendAudioSampleBuffer(sBuffer)
}
} catch {
fatalError(error.localizedDescription)
}
}
}
UPD: I modified the code so it could be more descriptive
Finally, I've found the answer. Here's the code for the conversion of AudioQueueBufferRef into AVAudioPCMBuffer:
func queueBufferToAudioBuffer(_ buffer: AudioQueueBufferRef) -> AVAudioPCMBuffer? {
guard let audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: format.mSampleRate,
channels: format.mChannelsPerFrame,
interleaved: true)
else { return nil }
let frameLength = buffer.pointee.mAudioDataBytesCapacity / audioFormat.streamDescription.pointee.mBytesPerFrame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameLength) else { return nil }
audioBuffer.frameLength = frameLength
let dstLeft = audioBuffer.floatChannelData![0]
let src = buffer.pointee.mAudioData.bindMemory(to: Float.self, capacity: Int(frameLength))
dstLeft.initialize(from: src, count: Int(frameLength))
return audioBuffer
}
I fixed this by setting up the AVAudioSession before AudioQueueStart.
do{
try AVAudioSession.sharedInstance().setCategory(.record, mode:.default)
try AVAudioSession.sharedInstance().setActive(true)
} catch{
print(error)
}