Get all sound frequencies of a WAV-file using Swift and AVFoundation - swift

I would like to capture all frequencies between given timespans in a Wav-file. The intent is to do some audio analysis in a later step. For test, I’ve used the application “Sox” to generate a 1 second long Wav-file which includes only a single tone at 13000Hz. I want to read the file and find that frequency.
I’m using AVFoundation (which is important) to read the file. Since the input data is in PCM, I need to use FFT to get the actual frequencies which I do using the Accelerate framework. However, I don’t get the expected result (13000Hz), but rather a lot of values I don’t understand. I’m new to audio development, so any hint about where my code is failing is appreciated. The code includes a few comments where the issue occurs.
Thanks in advance!
Code:
import AVFoundation
import Accelerate
class Analyzer {
// This function is implemented using the code from the following tutorial:
// https://developer.apple.com/documentation/accelerate/vdsp/fast_fourier_transforms/finding_the_component_frequencies_in_a_composite_sine_wave
func fftTransform(signal: [Float], n: vDSP_Length) -> [Int] {
let observed: [DSPComplex] = stride(from: 0, to: Int(n), by: 2).map {
return DSPComplex(real: signal[$0],
imag: signal[$0.advanced(by: 1)])
}
let halfN = Int(n / 2)
var forwardInputReal = [Float](repeating: 0, count: halfN)
var forwardInputImag = [Float](repeating: 0, count: halfN)
var forwardInput = DSPSplitComplex(realp: &forwardInputReal,
imagp: &forwardInputImag)
vDSP_ctoz(observed, 2,
&forwardInput, 1,
vDSP_Length(halfN))
let log2n = vDSP_Length(log2(Float(n)))
guard let fftSetUp = vDSP_create_fftsetup(
log2n,
FFTRadix(kFFTRadix2)) else {
fatalError("Can't create FFT setup.")
}
defer {
vDSP_destroy_fftsetup(fftSetUp)
}
var forwardOutputReal = [Float](repeating: 0, count: halfN)
var forwardOutputImag = [Float](repeating: 0, count: halfN)
var forwardOutput = DSPSplitComplex(realp: &forwardOutputReal,
imagp: &forwardOutputImag)
vDSP_fft_zrop(fftSetUp,
&forwardInput, 1,
&forwardOutput, 1,
log2n,
FFTDirection(kFFTDirection_Forward))
let componentFrequencies = forwardOutputImag.enumerated().filter {
$0.element < -1
}.map {
return $0.offset
}
return componentFrequencies
}
func run() {
// The frequencies array is a array of frequencies which is then converted to points on sinus curves (signal)
let n = vDSP_Length(4*4096)
let frequencies: [Float] = [1, 5, 25, 30, 75, 100, 300, 500, 512, 1023]
let tau: Float = .pi * 2
let signal: [Float] = (0 ... n).map { index in
frequencies.reduce(0) { accumulator, frequency in
let normalizedIndex = Float(index) / Float(n)
return accumulator + sin(normalizedIndex * frequency * tau)
}
}
// These signals are then restored using the fftTransform function above, giving the exact same values as in the "frequencies" variable
let frequenciesRestored = fftTransform(signal: signal, n: n).map({Float($0)})
assert(frequenciesRestored == frequencies)
// Now I want to do the same thing, but reading the frequencies from a file (which includes a constant tone at 13000 Hz)
let file = { PATH TO A WAV-FILE WITH A SINGLE TONE AT 13000Hz RUNNING FOR 1 SECOND }
let asset = AVURLAsset(url: URL(fileURLWithPath: file))
let track = asset.tracks[0]
do {
let reader = try AVAssetReader(asset: asset)
let sampleRate = 48000.0
let outputSettingsDict: [String: Any] = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: Int(sampleRate),
AVLinearPCMIsNonInterleaved: false,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false,
]
let output = AVAssetReaderTrackOutput(track: track, outputSettings: outputSettingsDict)
output.alwaysCopiesSampleData = false
reader.add(output)
reader.startReading()
typealias audioBuffertType = Int16
autoreleasepool {
while (reader.status == .reading) {
if let sampleBuffer = output.copyNextSampleBuffer() {
var audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 0, mDataByteSize: 0, mData: nil))
var blockBuffer: CMBlockBuffer?
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
bufferListSizeNeededOut: nil,
bufferListOut: &audioBufferList,
bufferListSize: MemoryLayout<AudioBufferList>.size,
blockBufferAllocator: nil,
blockBufferMemoryAllocator: nil,
flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
blockBufferOut: &blockBuffer
);
let buffers = UnsafeBufferPointer<AudioBuffer>(start: &audioBufferList.mBuffers, count: Int(audioBufferList.mNumberBuffers))
for buffer in buffers {
let samplesCount = Int(buffer.mDataByteSize) / MemoryLayout<audioBuffertType>.size
let samplesPointer = audioBufferList.mBuffers.mData!.bindMemory(to: audioBuffertType.self, capacity: samplesCount)
let samples = UnsafeMutableBufferPointer<audioBuffertType>(start: samplesPointer, count: samplesCount)
let myValues: [Float] = samples.map {
let value = Float($0)
return value
}
// Here I would expect my array to include multiple "13000" which is the frequency of the tone in my file
// I'm not sure what the variable 'n' does in this case, but changing it seems to change the result.
// The value should be twice as high as the highest measurable frequency (Nyquist frequency) (13000),
// but this crashes the application:
let mySignals = fftTransform(signal: myValues, n: vDSP_Length(2 * 13000))
assert(mySignals[0] == 13000)
}
}
}
}
}
catch {
print("error!")
}
}
}
The test clip can be generated using:
sox -G -n -r 48000 ~/outputfile.wav synth 1.0 sine 13000

Related

Am trying to add an AVAudioMixerNode with custom formatting, but I keep crashing on installTap

I have a very simple app that taps the microphone and records the audio into multiple files. Working code at the very bottom.
However the files are large and would like to change the sampling of the microphone buffer, which with my current Mac, is sampled at 44100.
If I understand correctly:
The formatting of the incoming audio can be different depending on hardware (i.e. bluetooth headset, etc.)
I can attach mixer nodes to the AVAudioEngine and change the audio formatting specific to a node.
So I make two changes:
After starting the AVAudioEngine, I add:
audioEngine.attach(downMixer)
audioEngine.connect(node, to: downMixer, format: format16KHzMono)
I change the installTap to downMixer and use the lower formatting
downMixer.installTap(onBus: 0, bufferSize: 8192, format: format16KHzMono, block:
However the error I get on .installTap is:
required condition is false: NULL != engine"
The only two things I have noticed is:
That my user defined format16KHzMono has only 7 key values, rather than 8. The one missing key is : AVChannelLayoutKey However, the crash occurs even if I switch to the old format.
The line audioEngine.attach(downMixer) gives me nine "throwing -10878" errors. According to other posts I have found, this "supposedly" can be ignored?
I can only assume I am connecting the mixer incorrectly, or at the wrong time, etc. If anyone can figure out what I am doing wrong I would appreciate it.
class MyAudio {
let audioEngine = AVAudioEngine()
var audioFile : AVAudioFile?
var node : AVAudioInputNode
var recordingFormat : AVAudioFormat
var downMixer = AVAudioMixerNode()
let format16KHzMono = AVAudioFormat.init(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: true)
init() {
node = audioEngine.inputNode
recordingFormat = node.inputFormat(forBus: 0)
let format16KHzMono = AVAudioFormat.init(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: true) }
func startRecording() {
audioBuffs = []
x = -1
node.installTap(onBus: 0, bufferSize: 8192, format: recordingFormat, block: {
[self]
(buffer, _) in
x += 1
audioBuffs.append(buffer)
if x >= 100 {
var cumSilence = 0
var topRange = 0
audioFile = makeFile(format: recordingFormat, index: fileCount)
fileCount += 1
for i in 50...100 {
let curVol = getVolume(from: audioBuffs[i], bufferSize: audioBuffs[i].frameLength)
if curVol < voxGate {
cumSilence += 1
} else if curVol >= voxGate && cumSilence < 4 {
cumSilence = 0
} else if curVol >= voxGate && cumSilence > 4 {
topRange = i
break
}
}
for i in 0...(topRange - Int(cumSilence/2)){
do {
try audioFile!.write(from: audioBuffs[i]);
} catch {
mainView?.setLabelText(tag: 4, text: "write error")
stopRecording()
}
}
for _ in 0...(topRange - Int(cumSilence/2)){
audioBuffs.remove(at: 0) }
x = 0
}
})
audioEngine.prepare()
do {
try audioEngine.start()
} catch let error { print ("oh catch \(error)") }
}

Swift, macOS, mac with 2 GPUs, matrix operations work on one GPU, not on the other

I'm running macOS on my MacBook Pro (Retina, 15-inch, Mid 2015), which has two GPUs in it, according to "About this Mac" in the Apple menu. One GPU is an AMD Radeon R9 M370X 2 GB, the other is an Intel Iris Pro 1536 MB -- the standard chips, I guess? They're the chips that were in there when I bought it, nothing I added myself.
I'm using the Swift MPS library for matrix computations; it works great on the Intel GPU, but when I select the Radeon, I only ever get back zeros from every operation, with no error reported. I've looked around for documentation on it, but I can't find anything. The only clue I have so far is that the Radeon reports "not integrated" (or at least, I think it does, based on the sample code at Finding GPUs on macOS, which is about as useful as Apple's doc ever is, meaning not very). If I've read that page correctly, this is what my two GPUs are telling me.
Device Intel Iris Pro Graphics; caps: headful, not discrete, integrated, not external
Device AMD Radeon R9 M370X; caps: headful, discrete, not integrated, not external
I can't find any doc that would suggest what I'm doing wrong. I've been all over Apple's MPS documentation, to no avail. And as I say, the code works great on the Intel GPU, so I should think it would run on the Radeon too. I've run some downloadable diagnostic tools to check on the Radeon, but it doesn't show up in the menus of those tools. So I don't even know whether this is something I'm doing wrong in the code, or if the chip itself is broken.
Below is the code, which you can build as a console app by pasting into main.swift. Find the following line:
let device = MTLCopyAllDevices()[1]
I use [0] for the Intel, [1] for the Radeon, and you can see that the output is different, that is, all zeros for the Radeon. I suppose your mileage may vary depending on your machine. I welcome any input, cheers
import MetalPerformanceShaders
typealias MPSNumber = Float32
let MPSNumberSize = MemoryLayout<MPSNumber>.size
let MPSNumberTypeInGPU = MPSDataType.float32
class MPSNet {
let commandBuffer: MTLCommandBuffer
let commandQueue: MTLCommandQueue
let device = MTLCopyAllDevices()[1]
var neuronsInMatrix1: MPSMatrix?
var neuronsInMatrix2: MPSMatrix?
var neuronsOutMatrix: MPSMatrix?
init() {
guard let cq = device.makeCommandQueue() else { fatalError() }
guard let cb = cq.makeCommandBuffer() else { fatalError() }
commandQueue = cq
commandBuffer = cb
let cMatrices = 2
let cRows = 1
let cColumns = 3
let sensoryInputs1: [MPSNumber] = [1, 2, 3]
let sensoryInputs2: [MPSNumber] = [4, 5, 6]
neuronsInMatrix1 = makeMatrix(device, sensoryInputs1)
neuronsInMatrix2 = makeMatrix(device, sensoryInputs2)
let rowStride = MPSMatrixDescriptor.rowBytes(fromColumns: cColumns, dataType: MPSNumberTypeInGPU)
neuronsOutMatrix = makeMatrix(device, cRows, cColumnsOut: cColumns, rowStride: rowStride)
let adder = MPSMatrixSum(
device: device, count: cMatrices, rows: cRows, columns: cColumns, transpose: false
)
adder.encode(
to: commandBuffer,
sourceMatrices: [neuronsInMatrix1!, neuronsInMatrix2!],
resultMatrix: neuronsOutMatrix!, scale: nil, offsetVector: nil,
biasVector: nil, start: 0
)
commandBuffer.addCompletedHandler { _ in
let motorOutputs = self.getComputeOutput(self.neuronsOutMatrix!)
let discrete = !self.device.isLowPower && !self.device.isRemovable
let caps = "\(self.device.isHeadless ? " headless" : " headful")" +
"\(discrete ? ", discrete" : ", not discrete")" +
"\(self.device.isLowPower ? ", integrated" : ", not integrated")" +
"\(self.device.isRemovable ? ", external" : ", not external")"
print("Device \(self.device.name); caps:\(caps); motor outputs \(motorOutputs)")
}
}
func compute() {
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
}
}
extension MPSNet {
func getComputeOutput(_ matrix: MPSMatrix) -> [Double] {
let rc = matrix.data.contents()
return stride(from: 0, to: matrix.columns * MPSNumberSize, by: MPSNumberSize).map {
offset in
let rr = rc.load(fromByteOffset: offset, as: MPSNumber.self)
return Double(rr)
}
}
func loadMatrix(_ data: MTLBuffer, _ rawValues: [MPSNumber]) {
let dContents = data.contents()
zip(stride(from: 0, to: rawValues.count * MPSNumberSize, by: MPSNumberSize), rawValues).forEach { z in
let (byteOffset, rawValue) = (z.0, MPSNumber(z.1))
dContents.storeBytes(of: rawValue, toByteOffset: byteOffset, as: MPSNumber.self)
}
}
func makeMatrix(_ device: MTLDevice, _ rawValues: [MPSNumber]) -> MPSMatrix {
let rowStride = MPSMatrixDescriptor.rowBytes(
fromColumns: rawValues.count, dataType: MPSNumberTypeInGPU
)
let descriptor = MPSMatrixDescriptor(
dimensions: 1, columns: rawValues.count, rowBytes: rowStride,
dataType: MPSNumberTypeInGPU
)
guard let inputBuffer = device.makeBuffer(
length: descriptor.matrixBytes, options: MTLResourceOptions.storageModeManaged
) else { fatalError() }
loadMatrix(inputBuffer, rawValues)
return MPSMatrix(buffer: inputBuffer, descriptor: descriptor)
}
func makeMatrix(_ device: MTLDevice, _ cRowsOut: Int, cColumnsOut: Int, rowStride: Int) -> MPSMatrix {
let matrixDescriptor = MPSMatrixDescriptor(
dimensions: cRowsOut, columns: cColumnsOut,
rowBytes: rowStride, dataType: MPSNumberTypeInGPU
)
return MPSMatrix(device: device, descriptor: matrixDescriptor)
}
}
let net = MPSNet()
net.compute()
It appears you have failed to use -[MPSMatrix synchronizeOnCommandBuffer:]. On discrete devices, some explicit synchronization is required before the data will come back from the GPU.
The problem lies in the storage mode of your matrix buffers. You're using MTLResourceOptions.storageModeManaged, which tells Metal that you want to manage synchronization of the memory being shared between the CPU and GPU. As mentioned in another answer here, you must use MPSMatrix.synchronize(on: MTLCommandBuffer) after a GPU operation before you try to read the data with the CPU. But you must also synchronize in the other direction, ie, after CPU operations before you commit the command to the GPU, using MTLBuffer.didModifyRange(_: Range).
Alternatively, you can use the shared storage mode, MTLResourceOptions.storageModeShared, which takes care of the synchronization for you.
See Synchronizing a Managed Resource in the Apple doc for details.
Below is a working version of your example using the managed storage mode as you have. Notice the differences in function MPSNet.compute(). You can leave that stuff out and just change the storage mode when creating the MTLBuffers for your matrices if your app is ok to use the shared storage mode.
import MetalPerformanceShaders
typealias MPSNumber = Float32
let MPSNumberSize = MemoryLayout<MPSNumber>.size
let MPSNumberTypeInGPU = MPSDataType.float32
class MPSNet {
let commandBuffer: MTLCommandBuffer
let commandQueue: MTLCommandQueue
let device = MTLCopyAllDevices()[1]
var neuronsInMatrix1: MPSMatrix?
var neuronsInMatrix2: MPSMatrix?
var neuronsOutMatrix: MPSMatrix?
init() {
guard let cq = device.makeCommandQueue() else { fatalError() }
guard let cb = cq.makeCommandBuffer() else { fatalError() }
commandQueue = cq
commandBuffer = cb
let cMatrices = 2
let cRows = 1
let cColumns = 3
let sensoryInputs1: [MPSNumber] = [1, 2, 3]
let sensoryInputs2: [MPSNumber] = [4, 5, 6]
neuronsInMatrix1 = makeMatrix(device, sensoryInputs1)
neuronsInMatrix2 = makeMatrix(device, sensoryInputs2)
let rowStride = MPSMatrixDescriptor.rowBytes(fromColumns: cColumns, dataType: MPSNumberTypeInGPU)
neuronsOutMatrix = makeMatrix(device, cRows, cColumnsOut: cColumns, rowStride: rowStride)
let adder = MPSMatrixSum(
device: device, count: cMatrices, rows: cRows, columns: cColumns, transpose: false
)
adder.encode(
to: commandBuffer,
sourceMatrices: [neuronsInMatrix1!, neuronsInMatrix2!],
resultMatrix: neuronsOutMatrix!, scale: nil, offsetVector: nil,
biasVector: nil, start: 0
)
commandBuffer.addCompletedHandler { _ in
let motorOutputs = self.getComputeOutput(self.neuronsOutMatrix!)
let discrete = !self.device.isLowPower && !self.device.isRemovable
let caps = "\(self.device.isHeadless ? " headless" : " headful")" +
"\(discrete ? ", discrete" : ", not discrete")" +
"\(self.device.isLowPower ? ", integrated" : ", not integrated")" +
"\(self.device.isRemovable ? ", external" : ", not external")"
print("Device \(self.device.name); caps:\(caps); motor outputs \(motorOutputs)")
}
}
func compute() {
for matrix in [neuronsInMatrix1!, neuronsInMatrix2!, neuronsOutMatrix!] {
let matrixData = matrix.data
matrixData.didModifyRange(0..<matrixData.length)
matrix.synchronize(on: commandBuffer)
}
commandBuffer.commit()
}
}
extension MPSNet {
func getComputeOutput(_ matrix: MPSMatrix) -> [Double] {
let rc = matrix.data.contents()
return stride(from: 0, to: matrix.columns * MPSNumberSize, by: MPSNumberSize).map {
offset in
let rr = rc.load(fromByteOffset: offset, as: MPSNumber.self)
return Double(rr)
}
}
func loadMatrix(_ data: MTLBuffer, _ rawValues: [MPSNumber]) {
let dContents = data.contents()
zip(stride(from: 0, to: rawValues.count * MPSNumberSize, by: MPSNumberSize), rawValues).forEach { z in
let (byteOffset, rawValue) = (z.0, MPSNumber(z.1))
dContents.storeBytes(of: rawValue, toByteOffset: byteOffset, as: MPSNumber.self)
}
}
func makeMatrix(_ device: MTLDevice, _ rawValues: [MPSNumber]) -> MPSMatrix {
let rowStride = MPSMatrixDescriptor.rowBytes(
fromColumns: rawValues.count, dataType: MPSNumberTypeInGPU
)
let descriptor = MPSMatrixDescriptor(
dimensions: 1, columns: rawValues.count, rowBytes: rowStride,
dataType: MPSNumberTypeInGPU
)
guard let inputBuffer = device.makeBuffer(
length: descriptor.matrixBytes, options: MTLResourceOptions.storageModeManaged
) else { fatalError() }
loadMatrix(inputBuffer, rawValues)
return MPSMatrix(buffer: inputBuffer, descriptor: descriptor)
}
func makeMatrix(_ device: MTLDevice, _ cRowsOut: Int, cColumnsOut: Int, rowStride: Int) -> MPSMatrix {
let matrixDescriptor = MPSMatrixDescriptor(
dimensions: cRowsOut, columns: cColumnsOut,
rowBytes: rowStride, dataType: MPSNumberTypeInGPU
)
return MPSMatrix(device: device, descriptor: matrixDescriptor)
}
}
let net = MPSNet()
net.compute()

swift AVAudioEngine convert multichannel non interleaved signal to single channel

I am using AVAudioEngine to take a measurement. I play a stimulus sound out of my interface, and use a micTap to record the returned signal.
I am now looking at different Audio Interfaces which support a multitude of different formats. I am converting the input format of the inputNode via a mixer for two different reasons:
to downsample from the interfaces' preferred sampleRate to the sampleRate at which my app is working
to convert the incoming channel count to a single mono channel
I try this, however it does not always seem to work as expected. If my interface is running 96k and my app is running 48k, doing a format change via a mixer ends up with the following:
This looks like it is only getting one side of a stereo interleaved channel. Below is my audioEngine code:
func initializeEngine(inputSweep:SweepFilter) {
buf1current = 0
buf2current = 0
in1StartTime = 0
in2startTime = 0
in1firstRun = true
in2firstRun = true
in1Buf = Array(repeating:0, count:1000000)
in2Buf = Array(repeating:0, count:1000000)
engine.stop()
engine.reset()
engine = AVAudioEngine()
numberOfSamples = 0
var time:Int = 0
do {
try AVAudioSession.sharedInstance().setCategory(.playAndRecord)
try AVAudioSession.sharedInstance()
.setPreferredSampleRate(Double(sampleRate))
} catch {
assertionFailure("AVAudioSession setup failed")
}
let format = engine.outputNode.inputFormat(forBus: 0)
let stimulusFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: Double(sampleRate),
channels: 1,
interleaved: false)
let outputFormat = engine.outputNode.inputFormat(forBus: 0)
let inputFormat = engine.inputNode.outputFormat(forBus: 0)
let srcNode = AVAudioSourceNode { _, timeStamp, frameCount, AudioBufferList -> OSStatus in
let ablPointer = UnsafeMutableAudioBufferListPointer(AudioBufferList)
if self.in2firstRun == true {
let start2 = CACurrentMediaTime()
self.in2startTime = Double(CACurrentMediaTime())
self.in2firstRun = false
}
if Int(frameCount) + time >= inputSweep.stimulus.count{
self.running = false
print("AUDIO ENGINE STOPPED")
}
if (Int(frameCount) + time) <= inputSweep.stimulus.count {
for frame in 0..<Int(frameCount) {
let value = inputSweep.stimulus[frame + time] * Float(outputVolume)
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = value
}
}
time += Int(frameCount)
} else {
for frame in 0..<Int(frameCount) {
let value = 0
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = Float(value)
}
}
}
return noErr
}
engine.attach(srcNode)
engine.connect(srcNode, to: engine.mainMixerNode, format: stimulusFormat)
engine.connect(engine.mainMixerNode, to: engine.outputNode, format: format)
let requiredFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: Double(sampleRate),
channels: 1,
interleaved: false)
let formatMixer = AVAudioMixerNode()
engine.attach(formatMixer)
engine.connect(engine.inputNode, to: formatMixer, format: inputFormat)
let MicSinkNode = AVAudioSinkNode() { (timeStamp, frames, audioBufferList) ->
OSStatus in
if self.in1firstRun == true {
let start1 = CACurrentMediaTime()
self.in1StartTime = Double(start1)
self.in1firstRun = false
}
let ptr = audioBufferList.pointee.mBuffers.mData?.assumingMemoryBound(to: Float.self)
var monoSamples = [Float]()
monoSamples.append(contentsOf: UnsafeBufferPointer(start: ptr, count: Int(frames)))
if self.buf1current >= 100000 {
self.running = false
}
for frame in 0..<frames {
self.in1Buf[self.buf1current + Int(frame)] = monoSamples[Int(frame)]
}
self.buf1current = self.buf1current + Int(frames)
return noErr
}
engine.attach(MicSinkNode)
engine.connect(formatMixer, to: MicSinkNode, format: requiredFormat)
engine.prepare()
assert(engine.inputNode != nil)
running = true
try! engine.start()
}
My sourceNode is an array of Floats synthesised to use the stimulusFormat. If I listen to this audioEngine with my interface at 96k, the output stimulus sounds completely clean. However this broken up signal is what is coming from the micTap. Physically the output of the interface is routed. directly to the input, so not going through any other device.
Further to this, I have the following function, which records my arrays to WAV files so that I can visually inspect in a DAW.
func writetoFile(buff:[Float], name:String){
let SAMPLE_RATE = sampleRate
let outputFormatSettings = [
AVFormatIDKey:kAudioFormatLinearPCM,
AVLinearPCMBitDepthKey:32,
AVLinearPCMIsFloatKey: true,
AVLinearPCMIsBigEndianKey: true,
AVSampleRateKey: SAMPLE_RATE,
AVNumberOfChannelsKey: 1
] as [String : Any]
let fileName = name
let DocumentDirURL = try! FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true)
let url = DocumentDirURL.appendingPathComponent(fileName).appendingPathExtension("wav")
//print("FilePath: \(url.path)")
let audioFile = try? AVAudioFile(forWriting: url, settings: outputFormatSettings, commonFormat: AVAudioCommonFormat.pcmFormatFloat32, interleaved: false)
let bufferFormat = AVAudioFormat(settings: outputFormatSettings)
let outputBuffer = AVAudioPCMBuffer(pcmFormat: bufferFormat!, frameCapacity: AVAudioFrameCount(buff.count))
for i in 0..<buff.count {
outputBuffer?.floatChannelData!.pointee[i] = Float(( buff[i] ))
}
outputBuffer!.frameLength = AVAudioFrameCount( buff.count )
do{
try audioFile?.write(from: outputBuffer!)
} catch let error as NSError {
print("error:", error.localizedDescription)
}
}
If I set my interface to be 48k, and my app is working at 48k, if I inspect my reference signal and. my measurement signal, i get the following:
The measured signal is clearly a lot longer than the original stimulus. The physical file size. is the same as it is initialised as an empty array of fixed size. However at some point doing the format conversion, it is not correct.
If I put my interface at 44.1k and my app runs at 48k, I can see the regular 'glitches' in audio. So the format convert here is not working as it should do.
Can anyone see anything obviously wrong?
put the non-interleaved option "AVLinearPCMIsNonInterleaved" inside the format settings:
let outputFormatSettings = [
**AVLinearPCMIsNonInterleaved: 0,**
AVFormatIDKey:kAudioFormatLinearPCM,
AVLinearPCMBitDepthKey:32,
AVLinearPCMIsFloatKey: true,
AVLinearPCMIsBigEndianKey: true,
AVSampleRateKey: SAMPLE_RATE,
AVNumberOfChannelsKey: 1
] as [String : Any]
it worked for me, let me know

How to convert Data of Int16 audio samples to array of float audio samples

I'm currently working with audio samples.
I get them from AVAssetReader and have a CMSampleBuffer with something like this:
guard let sampleBuffer = readerOutput.copyNextSampleBuffer() else {
guard reader.status == .completed else { return nil }
// Completed
// samples is an array of Int16
let samples = sampleData.withUnsafeBytes {
Array(UnsafeBufferPointer<Int16>(
start: $0, count: sampleData.count / MemoryLayout<Int16>.size))
}
// The only way I found to convert [Int16] -> [Float]...
return samples.map { Float($0) / Float(Int16.max)}
}
guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else {
return nil
}
let length = CMBlockBufferGetDataLength(blockBuffer)
let sampleBytes = UnsafeMutablePointer<UInt8>.allocate(capacity: length)
CMBlockBufferCopyDataBytes(blockBuffer, 0, length, sampleBytes)
sampleData.append(sampleBytes, count: length)
}
As you can see the only I found to convert [Int16] -> [Float] issamples.map { Float($0) / Float(Int16.max) but by doing this my processing time is increasing. Does it exist an other way to cast a pointer of Int16 to a pointer of Float?
"Casting" or "rebinding" a pointer only changes the way how memory is
interpreted. You want to compute floating point values from integers,
the new values have a different memory representation (and also a different
size).
Therefore you somehow have to iterate over all input values
and compute the new values. What you can do is to omit the Array
creation:
let samples = sampleData.withUnsafeBytes {
UnsafeBufferPointer<Int16>(start: $0, count: sampleData.count / MemoryLayout<Int16>.size)
}
return samples.map { Float($0) / Float(Int16.max) }
Another option would be to use the vDSP functions from the
Accelerate framework:
import Accelerate
// ...
let numSamples = sampleData.count / MemoryLayout<Int16>.size
var factor = Float(Int16.max)
var floats: [Float] = Array(repeating: 0.0, count: numSamples)
// Int16 array to Float array:
sampleData.withUnsafeBytes {
vDSP_vflt16($0, 1, &floats, 1, vDSP_Length(numSamples))
}
// Scaling:
vDSP_vsdiv(&floats, 1, &factor, &floats, 1, vDSP_Length(numSamples))
I don't know if that is faster, you'll have to check.
(Update: It is faster, as ColGraff demonstrated in his answer.)
An explicit loop is also much faster than using map:
let factor = Float(Int16.max)
let samples = sampleData.withUnsafeBytes {
UnsafeBufferPointer<Int16>(start: $0, count: sampleData.count / MemoryLayout<Int16>.size)
}
var floats: [Float] = Array(repeating: 0.0, count: samples.count)
for i in 0..<samples.count {
floats[i] = Float(samples[i]) / factor
}
return floats
An additional option in your case might be to use CMBlockBufferGetDataPointer() instead of CMBlockBufferCopyDataBytes()
into allocated memory.
You can do considerably better if you use the Accelerate Framework for the conversion:
import Accelerate
// Set up random [Int]
var randomInt = [Int16]()
randomInt.reserveCapacity(10000)
for _ in 0..<randomInt.capacity {
let value = Int16(Int32(arc4random_uniform(UInt32(UInt16.max))) - Int32(UInt16.max / 2))
randomInt.append(value)
}
// Time elapsed helper: https://stackoverflow.com/a/25022722/887210
func printTimeElapsedWhenRunningCode(title:String, operation:()->()) {
let startTime = CFAbsoluteTimeGetCurrent()
operation()
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for \(title): \(timeElapsed) s.")
}
// Testing
printTimeElapsedWhenRunningCode(title: "vDSP") {
var randomFloat = [Float](repeating: 0, count: randomInt.capacity)
vDSP_vflt16(randomInt, 1, &randomFloat, 1, vDSP_Length(randomInt.capacity))
}
printTimeElapsedWhenRunningCode(title: "map") {
randomInt.map { Float($0) }
}
// Results
//
// Time elapsed for vDSP : 0.000429034233093262 s.
// Time elapsed for flatMap: 0.00233501195907593 s.
It's an improvement of about 5 times faster.
(Edit: Added some changes suggested by Martin R)
#MartinR and #ColGraff gave really good answers, and thank you for everybody and the fast replies.
however I found an easier way to do that without any computation. AVAssetReaderAudioMixOutput requires an audio settings dictionary. Inside we can set the key AVLinearPCMIsFloatKey: true. This way I will read my data like this
let samples = sampleData.withUnsafeBytes {
UnsafeBufferPointer<Float>(start: $0,
count: sampleData.count / MemoryLayout<Float>.size)
}
for: Xcode 8.3.3 • Swift 3.1
extension Collection where Iterator.Element == Int16 {
var floatArray: [Float] {
return flatMap{ Float($0) }
}
}
usage:
let int16Array: [Int16] = [1, 2, 3 ,4]
let floatArray = int16Array.floatArray

how to stream mp3 urls (iOS) in Swift?

currently the code below can stream local mp3 files, so if i call
audio.scheduleFile(NSBundle.mainBundle().URLForResource("Moon River", withExtension: "mp3")!)
it will properly play the local file. now I want to be able to be able to stream non-local urls.
what do i need to do in order to allow me to stream mp3 urls?
class Audio: NSObject {
var graph: AUGraph
var filePlayerAU: AudioUnit
var filePlayerNode: AUNode
var outputAU: AudioUnit
var fileID: AudioFileID
var currentFrame: Int64
override init () {
graph = AUGraph()
filePlayerAU = AudioUnit()
filePlayerNode = AUNode()
outputAU = AudioUnit()
fileID = AudioFileID()
currentFrame = 0
super.init()
NewAUGraph(&graph)
// Add file player node
var cd = AudioComponentDescription(componentType: OSType(kAudioUnitType_Generator),
componentSubType: OSType(kAudioUnitSubType_AudioFilePlayer),
componentManufacturer: OSType(kAudioUnitManufacturer_Apple),
componentFlags: 0, componentFlagsMask: 0)
AUGraphAddNode(graph, &cd, &filePlayerNode)
// Add output node
var outputNode = AUNode()
cd.componentType = OSType(kAudioUnitType_Output)
cd.componentSubType = OSType(kAudioUnitSubType_RemoteIO)
AUGraphAddNode(graph, &cd, &outputNode)
// Graph must be opened before we can get node info!
AUGraphOpen(graph)
AUGraphNodeInfo(graph, filePlayerNode, nil, &filePlayerAU)
AUGraphNodeInfo(graph, outputNode, nil, &outputAU)
AUGraphConnectNodeInput(graph, filePlayerNode, 0, outputNode, 0)
AUGraphInitialize(graph)
registerCallbackForAU(filePlayerAU, nil)
}
func scheduleFile(url: NSURL) {
AudioFileOpenURL(url, 1, 0, &fileID)
// Step 1: schedule the file(s)
// kAudioUnitProperty_ScheduledFileIDs takes an array of AudioFileIDs
var filesToSchedule = [fileID]
AudioUnitSetProperty(filePlayerAU,
AudioUnitPropertyID(kAudioUnitProperty_ScheduledFileIDs),
AudioUnitScope(kAudioUnitScope_Global), 0, filesToSchedule,
UInt32(sizeof(AudioFileID)))
}
func scheduleRegion() {
// Step 2: Schedule the regions of the file(s) to play
// Swift forces us to fill out the structs completely, even if they are not used
let smpteTime = SMPTETime(mSubframes: 0, mSubframeDivisor: 0,
mCounter: 0, mType: 0, mFlags: 0,
mHours: 0, mMinutes: 0, mSeconds: 0, mFrames: 0)
var timeStamp = AudioTimeStamp(mSampleTime: 0, mHostTime: 0, mRateScalar: 0,
mWordClockTime: 0, mSMPTETime: smpteTime,
mFlags: UInt32(kAudioTimeStampSampleTimeValid), mReserved: 0)
var region = ScheduledAudioFileRegion()
region.mTimeStamp = timeStamp
region.mCompletionProc = nil
region.mCompletionProcUserData = nil
region.mAudioFile = fileID
region.mLoopCount = 0
region.mStartFrame = currentFrame
region.mFramesToPlay = UInt32.max
AudioUnitSetProperty(filePlayerAU,
AudioUnitPropertyID(kAudioUnitProperty_ScheduledFileRegion),
AudioUnitScope(kAudioUnitScope_Global), 0, &region,
UInt32(sizeof(ScheduledAudioFileRegion)))
// Step 3: Prime the file player
var primeFrames: UInt32 = 0
AudioUnitSetProperty(filePlayerAU,
AudioUnitPropertyID(kAudioUnitProperty_ScheduledFilePrime),
AudioUnitScope(kAudioUnitScope_Global), 0, &primeFrames,
UInt32(sizeof(UInt32)))
// Step 4: Schedule the start time (-1 = now)
timeStamp.mSampleTime = -1
AudioUnitSetProperty(filePlayerAU,
AudioUnitPropertyID(kAudioUnitProperty_ScheduleStartTimeStamp),
AudioUnitScope(kAudioUnitScope_Global), 0, &timeStamp,
UInt32(sizeof(AudioTimeStamp)))
}
}
If you are looking to play remote files(mp3) from a server. I would look into using AVPlayer().
https://developer.apple.com/documentation/avfoundation/avplayer
Here is a snippet that might nudge you in the right direction. Of course this is just a very basic example.
func playRemoteFile() {
let fileUrl = "http://www.foo.com/bar.mp3"
let url = NSURL(string: fileUrl)
var myPlayer = AVPlayer(URL: url)
myPlayer.play()
}