Change Sample rate with AudioConverter - swift

I am trying to re-sample the input audio 44.1 kHz to 48 kHz.
using AudioToolbox's AUAudioUnit.inputHandler
writing out the input 44.1 kHZ to a wav file (this is working perfectly)
converting the 44.1 kHz to 48 kHz and writing out this converted bytes to file. https://developer.apple.com/documentation/audiotoolbox/1503098-audioconverterfillcomplexbuffer
The problem is in the 3rd step. After writing out to a file the voice is very noisy.
here is my code:
// convert to 48kHz
var audioConverterRef: AudioConverterRef?
CheckError(AudioConverterNew(&self.hardwareFormat,
&self.convertingFormat,
&audioConverterRef), "AudioConverterNew failed")
let outputBufferSize = inNumBytes
let outputBuffer = UnsafeMutablePointer<Int16>.allocate(capacity: MemoryLayout<Int16>.size * Int(outputBufferSize))
let convertedData = AudioBufferList.allocate(maximumBuffers: 1)
convertedData[0].mNumberChannels = self.hardwareFormat.mChannelsPerFrame
convertedData[0].mDataByteSize = outputBufferSize
convertedData[0].mData = UnsafeMutableRawPointer(outputBuffer)
var ioOutputDataPackets = UInt32(inNumPackets)
CheckError(AudioConverterFillComplexBuffer(audioConverterRef!,
self.coverterCallback,
&bufferList,
&ioOutputDataPackets,
convertedData.unsafeMutablePointer,
nil), "AudioConverterFillComplexBuffer error")
let convertedmData = convertedData[0].mData!
let convertedmDataByteSize = convertedData[0].mDataByteSize
// Write converted packets to file -> audio_unit_int16_48.wav
CheckError(AudioFileWritePackets(self.outputFile48000!,
false,
convertedmDataByteSize,
nil,
recordPacket,
&ioOutputDataPackets,
convertedmData), "AudioFileWritePackets error")
and the conversion callback body is here:
let buffers = UnsafeMutableBufferPointer<AudioBuffer>(start: &bufferList.mBuffers, count: Int(bufferList.mNumberBuffers))
let dataPtr = UnsafeMutableAudioBufferListPointer(ioData)
dataPtr[0].mNumberChannels = 1
dataPtr[0].mData = buffers[0].mData
dataPtr[0].mDataByteSize = buffers[0].mDataByteSize
ioDataPacketCount.pointee = buffers[0].mDataByteSize / UInt32(MemoryLayout<Int16>.size)
the sample project is here: https://drive.google.com/file/d/1GvCJ5hEqf7PsBANwUpVTRE1L7S_zQxnL/view?usp=sharing

If part of your chain is still AVAudioEngine, there's sample code from Apple for offline processing of AVAudioFiles.
Here's a modified version that includes the sampleRate change:
import Cocoa
import AVFoundation
import PlaygroundSupport
let outputSampleRate = 48_000.0
let outputAudioFormat = AVAudioFormat(standardFormatWithSampleRate: outputSampleRate, channels: 2)!
// file needs to be in ~/Documents/Shared Playground Data
let localURL = playgroundSharedDataDirectory.appendingPathComponent("inputFile_44.aiff")
let outputURL = playgroundSharedDataDirectory.appendingPathComponent("outputFile_48.aiff")
let sourceFile: AVAudioFile
let format: AVAudioFormat
do {
sourceFile = try AVAudioFile(forReading: localURL)
format = sourceFile.processingFormat
} catch {
fatalError("Unable to load the source audio file: \(error.localizedDescription).")
}
let sourceSettings = sourceFile.fileFormat.settings
var outputSettings = sourceSettings
outputSettings[AVSampleRateKey] = outputSampleRate
let engine = AVAudioEngine()
let player = AVAudioPlayerNode()
engine.attach(player)
// Connect the nodes.
engine.connect(player, to: engine.mainMixerNode, format: format)
// Schedule the source file.
player.scheduleFile(sourceFile, at: nil)
do {
// The maximum number of frames the engine renders in any single render call.
let maxFrames: AVAudioFrameCount = 4096
try engine.enableManualRenderingMode(.offline, format: outputAudioFormat,
maximumFrameCount: maxFrames)
} catch {
fatalError("Enabling manual rendering mode failed: \(error).")
}
do {
try engine.start()
player.play()
} catch {
fatalError("Unable to start audio engine: \(error).")
}
let buffer = AVAudioPCMBuffer(pcmFormat: engine.manualRenderingFormat, frameCapacity: engine.manualRenderingMaximumFrameCount)!
var outputFile: AVAudioFile?
do {
outputFile = try AVAudioFile(forWriting: outputURL, settings: outputSettings)
} catch {
fatalError("Unable to open output audio file: \(error).")
}
let outputLengthD = Double(sourceFile.length) * outputSampleRate / sourceFile.fileFormat.sampleRate
let outputLength = Int64(ceil(outputLengthD)) // no sample left behind
while engine.manualRenderingSampleTime < outputLength {
do {
let frameCount = outputLength - engine.manualRenderingSampleTime
let framesToRender = min(AVAudioFrameCount(frameCount), buffer.frameCapacity)
let status = try engine.renderOffline(framesToRender, to: buffer)
switch status {
case .success:
// The data rendered successfully. Write it to the output file.
try outputFile?.write(from: buffer)
case .insufficientDataFromInputNode:
// Applicable only when using the input node as one of the sources.
break
case .cannotDoInCurrentContext:
// The engine couldn't render in the current render call.
// Retry in the next iteration.
break
case .error:
// An error occurred while rendering the audio.
fatalError("The manual rendering failed.")
}
} catch {
fatalError("The manual rendering failed: \(error).")
}
}
// Stop the player node and engine.
player.stop()
engine.stop()
outputFile = nil // AVAudioFile won't close until it goes out of scope, so we set output file back to nil here

Related

Recording speech synthesis to a saved file

Below is the code I've put together to attempt to take a phrase, save it to a file, then play that saved file. Not sure what area isn't working (not correct file name, not saving the file, not finding the file). Any help would be appreciated. (The speakPhrase is just a helper function to let me know that the speech synthesizer actually works, which it does).
import AVFoundation
import Foundation
class Coordinator {
let synthesizer: AVSpeechSynthesizer
var player: AVAudioPlayer?
init() {
let synthesizer = AVSpeechSynthesizer()
self.synthesizer = synthesizer
}
var recordingPath: URL {
let soundName = "Finally.caf"
// I've tried numerous file extensions. .caf was in an answer somewhere else. I would think it would be
// .pcm, but that doesn't work either.
// Local Directory
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
return paths[0].appendingPathComponent(soundName)
}
func speakPhrase(phrase: String) {
let utterance = AVSpeechUtterance(string: phrase)
utterance.voice = AVSpeechSynthesisVoice(language: "en")
synthesizer.speak(utterance)
}
func playFile() {
print("Trying to play the file")
do {
try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default)
try AVAudioSession.sharedInstance().setActive(true)
player = try AVAudioPlayer(contentsOf: recordingPath, fileTypeHint: AVFileType.caf.rawValue)
guard let player = player else {return}
player.play()
} catch {
print("Error playing file.")
}
}
func saveAVSpeechUtteranceToFile() {
let utterance = AVSpeechUtterance(string: "This is speech to record")
utterance.voice = AVSpeechSynthesisVoice(language: "en-US")
utterance.rate = 0.50
synthesizer.write(utterance) { [self] (buffer: AVAudioBuffer) in
guard let pcmBuffer = buffer as? AVAudioPCMBuffer else {
fatalError("unknown buffer type: \(buffer)")
}
if pcmBuffer.frameLength == 0 {
// Done
} else {
// append buffer to file
do {
let audioFile = try AVAudioFile(forWriting: recordingPath, settings: pcmBuffer.format.settings, commonFormat: .pcmFormatInt16, interleaved: false)
try audioFile.write(from: pcmBuffer)
} catch {
print(error.localizedDescription)
}
}
}
}
}
Did you noticed the bufferCallback in the below function is called multiple times?
func write(_ utterance: AVSpeechUtterance,toBufferCallback bufferCallback: #escaping AVSpeechSynthesizer.BufferCallback)
So the root cause is pretty simple: the AVSpeechUtterance's audio is divided into multiple parts. On my iPhone, the callback calls about 20 times.
So if you create a new audio file in the closure every time, you will get a very tiny audio file(on my iPhone it was a 6kb audio file). That audio is not noticeable if you play it.
So replace the function to
func saveAVSpeechUtteranceToFile() {
let utterance = AVSpeechUtterance(string: "This is speech to record")
utterance.voice = AVSpeechSynthesisVoice(language: "en-US")
utterance.rate = 0.50
// Only create new file handle if `output` is nil.
var output: AVAudioFile?
synthesizer.write(utterance) { [self] (buffer: AVAudioBuffer) in
guard let pcmBuffer = buffer as? AVAudioPCMBuffer else {
fatalError("unknown buffer type: \(buffer)")
}
if pcmBuffer.frameLength == 0 {
// Done
} else {
do{
// this closure is called multiple times. so to save a complete audio, try create a file only for once.
if output == nil {
try output = AVAudioFile(
forWriting: recordingPath,
settings: pcmBuffer.format.settings,
commonFormat: .pcmFormatInt16,
interleaved: false)
}
try output?.write(from: pcmBuffer)
}catch {
print(error.localizedDescription)
}
}
}
}
BTW, I uploaded Github Demo here.
Finally, tell you how to inspect the file contents on an iOS device.
Xcode Window Menu -> Device and Simulators, do like below to copy out your app's content.

Change BPM in real time with AVAudioEngine using Swift

Hello I am trying to implement simple audio app using AVAudioEngine, which plays short wav audio files in a loop at some bpm, that can be changed in real time (by slider or something).
Current solution logic:
set bpm=60
create audioFile from sample.wav
calculate bufferSize: AVAudioFrameCount(audioFile.processingFormat.sampleRate * 60 / Double(bpm))
set bufferSize to audioBuffer
load file audioFile into audioBuffer.
schedule audioBuffer to play
This solution works, but the issue is - if I want to change bpm I need to recreate buffer with different bufferSize, so it will not be in real time, since I need to stop player and reschedule buffer with different bufferSize.
Any thoughts how it can be done ?
Thanks in advance !
Code (main part):
var bpm:Float = 30
let engine = AVAudioEngine()
var player = AVAudioPlayerNode()
var audioBuffer: AVAudioPCMBuffer?
var audioFile: AVAudioFile?
override func viewDidLoad() {
super.viewDidLoad()
audioFile = loadfile(from: "sound.wav")
audioBuffer = tickBuffer(audioFile: audioFile!)
engine.attach(player)
engine.connect(player, to: engine.mainMixerNode, format: audioFile?.processingFormat)
do {
engine.prepare()
try engine.start()
} catch {
print(error)
}
}
private func loadfile(from fileName: String) -> AVAudioFile? {
let path = Bundle.main.path(forResource: fileName, ofType: nil)!
let url = URL(fileURLWithPath: path)
do {
let audioFile = try AVAudioFile(forReading: url)
return audioFile
} catch {
print("Error loading buffer1 \(error)")
}
return nil
}
func tickBuffer(audioFile: AVAudioFile) -> AVAudioPCMBuffer {
let periodLength = AVAudioFrameCount(audioFile.processingFormat.sampleRate * 60 / Double(bpm))
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: periodLength)!
try! audioFile.read(into: buffer)
buffer.frameLength = periodLength
return buffer
}
func play() {
player.scheduleBuffer(audioBuffer, at: nil, options: .loops, completionHandler: nil)
player.play()
}
func stop() {
player.stop()
}

AVAudioEngine MIDI file play (Current progress + MIDI end callback) Swift

Am playing MID using AVAudioEngine, AVAudioSequencer, AVAudioUnitSampler.
AVAudioUnitSampler loads Soundfont and AVAudioSequencer load MIDI file.
My initial configurations are
engine = AVAudioEngine()
sampler = AVAudioUnitSampler()
speedControl = AVAudioUnitVarispeed()
pitchControl = AVAudioUnitTimePitch()
engine.attach(sampler)
engine.attach(pitchControl)
engine.attach(speedControl)
engine.connect(sampler, to: speedControl, format: nil)
engine.connect(speedControl, to: pitchControl, format: nil)
engine.connect(pitchControl, to: engine.mainMixerNode, format: nil)
Here is how my sequence loads MIDI file
func setupSequencer() {
self.sequencer = AVAudioSequencer(audioEngine: self.engine)
let options = AVMusicSequenceLoadOptions()
let documentsDirectoryURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!
let introurl = URL(string: songDetails!.intro!)
let midiFileURL = documentsDirectoryURL.appendingPathComponent(introurl!.lastPathComponent)
do {
try sequencer.load(from: midiFileURL, options: options)
print("loaded \(midiFileURL)")
} catch {
print("something screwed up \(error)")
return
}
sequencer.prepareToPlay()
if sequencer.isPlaying == false{
}
}
And here is how sampler load SoundFont
func loadSF2PresetIntoSampler(_ preset: UInt8,bankURL:URL ) {
do {
try self.sampler.loadSoundBankInstrument(at: bankURL,
program: preset,
bankMSB: UInt8(kAUSampler_DefaultMelodicBankMSB),
bankLSB: UInt8(kAUSampler_DefaultBankLSB))
} catch {
print("error loading sound bank instrument")
}
}
And it's playing fine no issue with this. I have 2 other requirements, and am having problem in those
I have to play another MIDI file after first MID ends playing, for that i need to get the complete/finish MIDI file callback from either Engine or Sequence OR How can i load multiple MIDI files in Sequence? I have tried many ways but didn't help.
I need to show the progress of MIDI file play, like current time and total time. For this i have tried a method found in stack answers somewhere which is:
var currentPositionInSeconds: TimeInterval {
get {
guard let offsetTime = offsetTime else { return 0 }
guard let lastRenderTime = engine.outputNode.lastRenderTime else { return 0 }
let frames = lastRenderTime.sampleTime - offsetTime.sampleTime
return Double(frames) / offsetTime.sampleRate
}
}
Here offsetTime is
offsetTime = engine.outputNode.lastRenderTime
And it always return nil.
Glad to see someone using my example code.
It's a missing feature. Please file a Radar. Nothing will happen without a Radar.
They do pay attention to them to schedule what to work on.
I fake it by getting the length of the sequence.
if let ft = sequencer.tracks.first {
self.lengthInSeconds = ft.lengthInSeconds
} else {
self.lengthInSeconds = 0
}
Then in my play function,
do {
try sequencer.start()
Timer.scheduledTimer(withTimeInterval: self.lengthInSeconds, repeats: false) {
[weak self] (t: Timer) in
guard let self = self else {return}
t.invalidate()
self.logger.debug("sequencer finished")
self.sequencer.stop()
}
...
You can use a DispatchSourceTimer if you want more accuracy.

Playing Multiple WAV out Multiple Channels AVAudioEngine

I have 15 WAV files that I need to play back in sequence all on individual channels. I'm starting out trying to get two files working with a left / right stereo separation.
I’m creating an audio engine, a mixer and two AVAudioPlayerNodes. The audio files are mono and I’m trying to get the file from PlayerA to come out the left channel and the file from PlayerB to come out the right channel. What I’m having trouble understanding is how the AudioUnitSetProperty works. It seems to relate to a single file only and seems to only be able to have one per audioUnit? I’m wondering if there is a way I can associate a file with an audioUnit? I can’t seem to return the audioUnit object associated with each track.
func testCode(){
// get output hardware format
let output = engine.outputNode
let outputHWFormat = output.outputFormat(forBus: 0)
// connect mixer to output
let mixer = engine.mainMixerNode
engine.connect(mixer, to: output, format: outputHWFormat)
//then work on the player end by first attaching the player to the engine
engine.attach(playerA)
engine.attach(playerB)
//find the audiofile
guard let audioFileURLA = Bundle.main.url(forResource: "test", withExtension: "wav") else {
fatalError("audio file is not in bundle.")
}
guard let audioFileURLB = Bundle.main.url(forResource: "test2", withExtension: "wav") else {
fatalError("audio file is not in bundle.")
}
var songFileA:AVAudioFile?
do {
songFileA = try AVAudioFile(forReading: audioFileURLA)
print(songFileA!.processingFormat)
// connect player to mixer
engine.connect(playerA, to: mixer, format: songFileA!.processingFormat)
} catch {
fatalError("canot create AVAudioFile \(error)")
}
let channelMap: [Int32] = [0, -1] //play channel in left
let propSize: UInt32 = UInt32(channelMap.count) * UInt32(MemoryLayout<sint32>.size)
print(propSize)
let code: OSStatus = AudioUnitSetProperty((engine.inputNode?.audioUnit)!,
kAudioOutputUnitProperty_ChannelMap,
kAudioUnitScope_Global,
1,
channelMap,
propSize);
print(code)
let channelMapB: [Int32] = [-1, 0] //play channel in left
var songFileB:AVAudioFile?
do {
songFileB = try AVAudioFile(forReading: audioFileURLB)
print(songFileB!.processingFormat)
// connect player to mixer
engine.connect(playerB, to: mixer, format: songFileB!.processingFormat)
} catch {
fatalError("canot create AVAudioFile \(error)")
}
let codeB: OSStatus = AudioUnitSetProperty((engine.inputNode?.audioUnit)!,
kAudioOutputUnitProperty_ChannelMap,
kAudioUnitScope_Global,
1,
channelMapB,
propSize);
print(codeB)
do {
try engine.start()
} catch {
fatalError("Could not start engine. error: \(error).")
}
playerA.scheduleFile(songFileA!, at: nil) {
print("done")
self.playerA.play()
}
playerB.scheduleFile(songFileA!, at: nil) {
print("done")
self.playerB.play()
}
playerA.play()
playerB.play()
print(playerA.isPlaying)
}
engine.connect(mixer, to: output, format: outputHWFormat)
This isn't necessary, the mixer will be implicitly connected when accessed.
As for panning: AudioUnitSetProperty also isn't necessary. AVAudioPlayerNode conforms to AVAudioMixing, so since there is a mixer node downstream from the player, all you have to do is this:
playerA.pan = -1
playerB.pan = 1

Buffered input to SecTransform

I'm reading data from an audio file and computing a hash of it, as in the style of the ffmpeg MD5 muxer, except I'm doing SHA2 and using AVFoundation and the OS X SecTransform API.
What this does is it opens an audio file, converts it into it's native PCM format if it's compressed, and then hashes the interleaved samples byte-by-byte.
When I read the audio from my files, I would normally read the samples into a buffer in a for or while loop.
extension AVAudioFile {
func sha2() throws -> NSData {
let bufSize = AVAudioFrameCount(0x1000)
let buffer = AVAudioPCMBuffer(PCMFormat: self.processingFormat,
frameCapacity: bufSize)
// initialize digest algo
for(;;) {
try readIntoBuffer(buffer)
if buffer.frameLength > 0 {
// read buffer into digest
} else {
break
}
}
// finalize digest and return...
return NSData()
}
}
The issue I'm having is, the only way I can see of loading data into a SecTransform is either handing it all the data at once in a CFData, or as a CFReadStream. How do I feed my data buffer-by-buffer into a SecTransform?
I figured it out, you create bound input and output streams with NSStream.getBoundStreamsWithBufferSize() and then you feed the output stream with an asynchronous loop
The complete implementation is like this:
func writeSamplesFromBuffer(buffer: AVAudioPCMBuffer, toStream : NSOutputStream) {
assert(buffer.format.interleaved == true)
var rawBuffer = UnsafePointer<UInt8>(buffer.int32ChannelData.memory)
var toWrite = sizeof(Int32) *
Int(buffer.format.channelCount) * Int(buffer.frameLength)
while toWrite > 0 {
let written = toStream.write(rawBuffer, maxLength: toWrite)
rawBuffer = rawBuffer.advancedBy(written)
toWrite -= written
}
}
func writeAudioDataFromURL(url : NSURL,
usingFormat format: AVAudioCommonFormat,
toStream: NSOutputStream) throws {
let audioFile = try AVAudioFile(forReading: url,
commonFormat: format,
interleaved: true)
let pcmBuffer = AVAudioPCMBuffer(PCMFormat:
audioFile.processingFormat,
frameCapacity: 0x1000)
toStream.open()
let writerQueue = dispatch_get_global_queue(QOS_CLASS_UTILITY, 0)
dispatch_async(writerQueue) {
while true {
do {
try audioFile.readIntoBuffer(pcmBuffer)
if pcmBuffer.frameLength > 0 {
writeSamplesFromBuffer(pcmBuffer, toStream: toStream)
} else {
break
}
} catch let error {
fatalError("Fatal error: \(error) while reading audio file \(audioFile) at URL \(url)")
}
}
toStream.close()
}
}
func sha256DigestForStream(stream : NSInputStream) throws -> NSData {
let transform = SecTransformCreateGroupTransform().takeRetainedValue()
let readXform = SecTransformCreateReadTransformWithReadStream(stream as CFReadStreamRef).takeRetainedValue()
var error : Unmanaged<CFErrorRef>? = nil
let digestXform = SecDigestTransformCreate(kSecDigestSHA2, 256, &error).takeRetainedValue()
SecTransformConnectTransforms(readXform, kSecTransformOutputAttributeName,
digestXform, kSecTransformInputAttributeName,
transform, &error)
if let e = error { throw e.takeUnretainedValue() }
if let output = SecTransformExecute(transform, &error) as? NSData {
return output
} else {
throw error!.takeRetainedValue()
}
}
func sha256DigestForAudioFile(url : NSURL,
convertedToSampleFormat sampleFormat: AVAudioCommonFormat) throws -> NSData {
let streamBufSize = 0x1000
var rs : NSInputStream? = nil
var ws : NSOutputStream? = nil
NSStream.getBoundStreamsWithBufferSize(streamBufSize,
inputStream: &rs, outputStream: &ws)
guard let readStream = rs, writeStream = ws else {
fatalError("Failed to create file read streams")
}
try writeAudioDataFromURL(url,
usingFormat: sampleFormat, toStream: writeStream)
return try sha256DigestForStream(readStream)
}