Why can't I iterate through the full array in Metal - swift

I have the following code, which I think allocates 2 arrays of 16kb of memory, fills one with 7 and the other with 4.
I then run the code and find that only the first 511 items get added together. I'm 80% certain it's to do with the size of the thread groups/thread counts. Any Ideas?
func test()
var (device, commandQueue, defaultLibrary, commandBuffer, computeCommandEncoder) = initMetal()
let alignment:Int = 0x4000
var xpointer: UnsafeMutableRawPointer? = nil
var ypointer: UnsafeMutableRawPointer? = nil
let numberOfFloats = 4096 //56
let numberOfBytes: Int = 16384
let retx = posix_memalign(&xpointer, alignment, numberOfBytes)
if retx != noErr {
let err = String(validatingUTF8: strerror(retx)) ?? "unknown error"
fatalError("Unable to allocate aligned memory: \(err).")
let rety = posix_memalign(&ypointer, alignment, numberOfBytes)
if rety != noErr {
let err = String(validatingUTF8: strerror(rety)) ?? "unknown error"
fatalError("Unable to allocate aligned memory: \(err).")
let datax = xpointer!.bindMemory(to: Float.self, capacity: numberOfFloats)
for index in 0..<numberOfFloats {
datax[index] = 7.0
let datay = ypointer!.bindMemory(to: Float.self, capacity: numberOfFloats)
for index in 0..<numberOfFloats {
datay[index] = 4.0
kernelFunction = defaultLibrary.makeFunction(name: "sigmoid")
pipelineState = try device.makeComputePipelineState(function: kernelFunction!)
fatalError("Unable to create pipeline state")
let startTime = CFAbsoluteTimeGetCurrent()
var xvectorBufferNoCopy = device.makeBuffer(bytesNoCopy: xpointer!, length: numberOfBytes, options: [], deallocator: nil)
computeCommandEncoder.setBuffer(xvectorBufferNoCopy, offset: 0, at: 0)
var yvectorBufferNoCopy = device.makeBuffer(bytesNoCopy: ypointer!, length: numberOfBytes, options: [], deallocator: nil)
computeCommandEncoder.setBuffer(yvectorBufferNoCopy, offset: 0, at: 1)
var threadgroupCounts = MTLSize(width:32,height:1,depth:1)
var threadgroups = MTLSize(width:1024, height:1, depth:1)
computeCommandEncoder.dispatchThreadgroups(threadgroups, threadsPerThreadgroup: threadgroupCounts)
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed for \(title): \(timeElapsed) s")
let data = ypointer!.bindMemory(to: Float.self, capacity: numberOfFloats)
for index in 0..<numberOfFloats {
print("\(index) - \(data[index])")
kernel void addTest(const device float *inVector [[ buffer(0) ]],
device float *outVector [[ buffer(1) ]],
uint id [[ thread_position_in_grid ]]) {
outVector[id] = inVector[id] + outVector[id];


Please help me figure out why this Metal code is slower than CPU code?

I'm benchmarking GPU computing using Metal running on an Apple M1 Max's 32-core GPU. The test task is to add an array of 1 billion numbers to another array of the same size. For that, I'm using Xcode 13.1 to run the project in the Release build configuration (optimized for performance) and with "Debug executable" turned off.
I've got two files:
#include <metal_stdlib>
using namespace metal;
kernel void addition_compute_function(constant float *arr1 [[ buffer(0) ]],
constant float *arr2 [[ buffer(1) ]],
device float *resultArray [[ buffer(2) ]],
uint index [[ thread_position_in_grid ]]) {
resultArray[index] = arr1[index] + arr2[index];
import MetalKit
func main() {
let count = 1_000_000_000
let array1 = getArray(count: count)
let array2 = Array(array1.reversed())
cpuWay(count: count, arr1: array1, arr2: array2)
gpuWay(count: count, arr1: array1, arr2: array2)
func cpuWay(count: Int, arr1: [Float], arr2: [Float]) {
var result: [Float] = .init(repeating: 0.0, count: count)
print("CPU way:")
let startTime = CFAbsoluteTimeGetCurrent()
for i in 0..<count {
result[i] = arr1[i] + arr2[i]
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed \(String(format: "%.05f", timeElapsed)) seconds")
func gpuWay(count: Int, arr1: [Float], arr2: [Float]) {
guard let device = MTLCreateSystemDefaultDevice() else { fatalError() }
guard let library = device.makeDefaultLibrary() else { fatalError() }
guard let function = library.makeFunction(name: "addition_compute_function") else { fatalError() }
let length = count*MemoryLayout<Float>.size
let arr1Buff = device.makeBuffer(bytes: arr1, length: length, options: .storageModeShared)
let arr2Buff = device.makeBuffer(bytes: arr2, length: length, options: .storageModeShared)
let resultBuff = device.makeBuffer(length: length, options: .storageModeShared)
guard let commandQueue = device.makeCommandQueue() else { fatalError() }
guard let commandBuffer = commandQueue.makeCommandBuffer() else { fatalError() }
guard let commandEncoder = commandBuffer.makeComputeCommandEncoder() else { fatalError() }
let additionComputePipelineState = try! device.makeComputePipelineState(function: function)
commandEncoder.setBuffer(arr1Buff, offset: 0, index: 0)
commandEncoder.setBuffer(arr2Buff, offset: 0, index: 1)
commandEncoder.setBuffer(resultBuff, offset: 0, index: 2)
let threadsPerGrid = MTLSize(width: count, height: 1, depth: 1)
let maxThreadsPerThreadgroup = additionComputePipelineState.maxTotalThreadsPerThreadgroup // 1024
let threadsPerThreadgroup = MTLSize(width: maxThreadsPerThreadgroup, height: 1, depth: 1)
commandEncoder.dispatchThreads(threadsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup)
print("GPU way:")
let startTime = CFAbsoluteTimeGetCurrent()
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime
print("Time elapsed \(String(format: "%.05f", timeElapsed)) seconds")
func getArray(count: Int) -> [Float] {
var result: [Float] = .init(repeating: 0.0, count: count)
for i in 0..<count { result[i] = Float(i) }
return result
And the results are:
CPU way:
Time elapsed 0.35217 seconds
GPU way:
Time elapsed 2.10124 seconds
Why is that? Am I using all the GPU cores?
Please note
This question is about Metal, not Matlab. It's not a duplicate of
Why does CPU run faster than GPU in this code?
In that question the OP is testing just 10 elements, this code is testing 1 billion elements.

How can mp3 data in memory be loaded into an AVAudioPCMBuffer in Swift?

I have a class method to read an mp3 file into an AVAudioPCMBuffer as follows:
private(set) var fullAudio: AVAudioPCMBuffer?
func initAudio(audioFileURL: URL) -> Bool {
var status = true
do {
let audioFile = try AVAudioFile(forReading: audioFileURL)
let audioFormat = audioFile.processingFormat
let audioFrameLength = UInt32(audioFile.length)
fullAudio = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameLength)
if let fullAudio = fullAudio {
try audioFile.read(into: fullAudio)
// processing of full audio
} catch {
status = false
return status
However, I now need to be able to read the same mp3 info from memory (rather than a file) into the AVAudioPCMBuffer without using the file system, where the info is held in the Data type, for example using a function declaration of the form
func initAudio(audioFileData: Data) -> Bool {
// some code setting up fullAudio
How can this be done? I've looked to see whether there is a route from Data holding mp3 info to AVAudioPCMBuffer (e.g. via AVAudioBuffer or AVAudioCompressedBuffer), but haven't seen a way forward.
I went down the rabbit hole on this one. Here is what probably amounts to a Rube Goldberg-esque solution:
A lot of the pain comes from using C from Swift.
func data_AudioFile_ReadProc(_ inClientData: UnsafeMutableRawPointer, _ inPosition: Int64, _ requestCount: UInt32, _ buffer: UnsafeMutableRawPointer, _ actualCount: UnsafeMutablePointer<UInt32>) -> OSStatus {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
let bufferPointer = UnsafeMutableRawBufferPointer(start: buffer, count: Int(requestCount))
let copied = data.copyBytes(to: bufferPointer, from: Int(inPosition) ..< Int(inPosition) + Int(requestCount))
actualCount.pointee = UInt32(copied)
return noErr
func data_AudioFile_GetSizeProc(_ inClientData: UnsafeMutableRawPointer) -> Int64 {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
return Int64(data.count)
extension Data {
func convertedTo(_ format: AVAudioFormat) -> AVAudioPCMBuffer? {
var data = self
var af: AudioFileID? = nil
var status = AudioFileOpenWithCallbacks(&data, data_AudioFile_ReadProc, nil, data_AudioFile_GetSizeProc(_:), nil, 0, &af)
guard status == noErr, af != nil else {
return nil
defer {
var eaf: ExtAudioFileRef? = nil
status = ExtAudioFileWrapAudioFileID(af!, false, &eaf)
guard status == noErr, eaf != nil else {
return nil
defer {
var clientFormat = format.streamDescription.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientDataFormat, UInt32(MemoryLayout.size(ofValue: clientFormat)), &clientFormat)
guard status == noErr else {
return nil
if let channelLayout = format.channelLayout {
var clientChannelLayout = channelLayout.layout.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientChannelLayout, UInt32(MemoryLayout.size(ofValue: clientChannelLayout)), &clientChannelLayout)
guard status == noErr else {
return nil
var frameLength: Int64 = 0
var propertySize: UInt32 = UInt32(MemoryLayout.size(ofValue: frameLength))
status = ExtAudioFileGetProperty(eaf!, kExtAudioFileProperty_FileLengthFrames, &propertySize, &frameLength)
guard status == noErr else {
return nil
guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(frameLength)) else {
return nil
let bufferSizeFrames = 512
let bufferSizeBytes = Int(format.streamDescription.pointee.mBytesPerFrame) * bufferSizeFrames
let numBuffers = format.isInterleaved ? 1 : Int(format.channelCount)
let numInterleavedChannels = format.isInterleaved ? Int(format.channelCount) : 1
let audioBufferList = AudioBufferList.allocate(maximumBuffers: numBuffers)
for i in 0 ..< numBuffers {
audioBufferList[i] = AudioBuffer(mNumberChannels: UInt32(numInterleavedChannels), mDataByteSize: UInt32(bufferSizeBytes), mData: malloc(bufferSizeBytes))
defer {
for buffer in audioBufferList {
while true {
var frameCount: UInt32 = UInt32(bufferSizeFrames)
status = ExtAudioFileRead(eaf!, &frameCount, audioBufferList.unsafeMutablePointer)
guard status == noErr else {
return nil
if frameCount == 0 {
let src = audioBufferList
let dst = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList)
if src.count != dst.count {
return nil
for i in 0 ..< src.count {
let srcBuf = src[i]
let dstBuf = dst[i]
memcpy(dstBuf.mData?.advanced(by: Int(dstBuf.mDataByteSize)), srcBuf.mData, Int(srcBuf.mDataByteSize))
pcmBuffer.frameLength += frameCount
return pcmBuffer
A more robust solution would probably read the sample rate and channel count and give the option to preserve them.
Tested using:
let url = URL(fileURLWithPath: "/tmp/test.mp3")
let data = try! Data(contentsOf: url)
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)!
if let d = data.convertedTo(format) {
let avf = try! AVAudioFile(forWriting: URL(fileURLWithPath: "/tmp/foo.wav"), settings: format.settings, commonFormat: format.commonFormat, interleaved: format.isInterleaved)
try! avf.write(from: d)

Using AudioToolbox instead of AVFoundation in SFSpeechRecognizer

I have to use AudioToolbox instead AVAudioSession for providing stream to SFSpeechRecognizer. I know that I should use AudioQueue, so I made an audio recording export to CMSampleBuffer to read it with recognizer. And while debugging I see that the buffer is added to SFSpeechAudioBufferRecognitionRequest, but the code in the task closure doesn't execute: neither result, nor error.
What's wrong with the code?
struct RecordState {
var dataFormat = AudioStreamBasicDescription()
var queue: AudioQueueRef?
var buffers: [AudioQueueBufferRef] = []
var audioFile: AudioFileID?
var currentPacket: Int64 = 0
var recording = false
func сallback(_ inUserData: UnsafeMutableRawPointer?,
_ inAQ: AudioQueueRef,
_ inBuffer: AudioQueueBufferRef,
_ inStartTime: UnsafePointer<AudioTimeStamp>,
_ inNumberPacketDescriptions: UInt32,
_ inPacketDescs: UnsafePointer<AudioStreamPacketDescription>?) {
let recordState = inUserData?.assumingMemoryBound(to: RecordState.self)
if let queue = recordState?.pointee.queue {
AudioQueueEnqueueBuffer(queue, inBuffer, 0, nil)
let rec = AudioRecorder.sharedInstance
rec.transformBuffer(pBuffer: inBuffer, pLength: inBuffer.pointee.mAudioDataByteSize)
class AudioRecorder: NSObject, ObservableObject, SFSpeechRecognizerDelegate {
let format = AudioStreamBasicDescription(mSampleRate: Float64(16000.0), mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagsNativeFloatPacked, mBytesPerPacket: UInt32(MemoryLayout<Float32>.size), mFramesPerPacket: 1, mBytesPerFrame: UInt32(MemoryLayout<Float32>.size), mChannelsPerFrame: 1, mBitsPerChannel: UInt32(MemoryLayout<Float32>.size * 8), mReserved: 0)
var recordState = RecordState()
var startTime = CFAbsoluteTimeGetCurrent()
static var sharedInstance = AudioRecorder()
private var speechRecognizer = SFSpeechRecognizer()!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var engineEnabled = false
private var lastText = [SFTranscriptionSegment]()
override init() {
OperationQueue.main.addOperation {
SFSpeechRecognizer.requestAuthorization { authStatus in
switch authStatus {
case .authorized:
self.engineEnabled = true
self.engineEnabled = false
self.speechRecognizer.delegate = self
func startRecording() {
recordState.dataFormat = format
var queue: AudioQueueRef?
if AudioQueueNewInput(&recordState.dataFormat, сallback, &recordState, CFRunLoopGetCurrent(), CFRunLoopMode.commonModes.rawValue, 0, &queue) == noErr {
recordState.queue = queue
} else {
for _ in 0..<NUM_BUFFERS {
var buffer: AudioQueueBufferRef?
if AudioQueueAllocateBuffer(queue!, 1024, &buffer) == noErr {
AudioQueueEnqueueBuffer(queue!, buffer!, 0, nil)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
// Keep speech recognition data on device
if #available(iOS 13, *) {
recognitionRequest.requiresOnDeviceRecognition = true
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
isFinal = result.isFinal
if error != nil || isFinal {
// Stop recognizing speech if there is a problem.
self.recognitionRequest = nil
self.recognitionTask = nil
recordState.recording = true
if AudioQueueStart(recordState.queue!, nil) != noErr {
fatalError("Something is wrong")
self.startTime = CFAbsoluteTimeGetCurrent()
func stopRecording() {
recordState.recording = false
AudioQueueStop(recordState.queue!, true)
for i in 0..<NUM_BUFFERS {
if let buffers = recordState.buffers[i] as? AudioQueueBufferRef {
AudioQueueFreeBuffer(recordState.queue!, buffers)
AudioQueueDispose(recordState.queue!, true)
if let file = recordState.audioFile {
func transformBuffer(pBuffer: AudioQueueBufferRef, pLength: UInt32) {
var blockBuffer: CMBlockBuffer?
CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: pBuffer, blockLength: Int(pLength), blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData: 0, dataLength: Int(pLength), flags: kCMBlockBufferAssureMemoryNowFlag, blockBufferOut: &blockBuffer)
let timeFormat = format.mSampleRate
let currentTime = CFAbsoluteTimeGetCurrent()
let elapsedTime: CFTimeInterval = currentTime - self.startTime
let timeStamp = CMTimeMake(value: Int64(elapsedTime * timeFormat), timescale: Int32(timeFormat))
let nSamples = Int(pLength / format.mBytesPerFrame)
do {
let formatDescription = try CMAudioFormatDescription(audioStreamBasicDescription: format)
var sampleBuffer: CMSampleBuffer?
CMAudioSampleBufferCreateWithPacketDescriptions(allocator: kCFAllocatorDefault, dataBuffer: blockBuffer, dataReady: true, makeDataReadyCallback: nil, refcon: nil, formatDescription: formatDescription, sampleCount: nSamples, presentationTimeStamp: timeStamp, packetDescriptions: nil, sampleBufferOut: &sampleBuffer)
if let sBuffer = sampleBuffer {
} catch {
UPD: I modified the code so it could be more descriptive
Finally, I've found the answer. Here's the code for the conversion of AudioQueueBufferRef into AVAudioPCMBuffer:
func queueBufferToAudioBuffer(_ buffer: AudioQueueBufferRef) -> AVAudioPCMBuffer? {
guard let audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: format.mSampleRate,
channels: format.mChannelsPerFrame,
interleaved: true)
else { return nil }
let frameLength = buffer.pointee.mAudioDataBytesCapacity / audioFormat.streamDescription.pointee.mBytesPerFrame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameLength) else { return nil }
audioBuffer.frameLength = frameLength
let dstLeft = audioBuffer.floatChannelData![0]
let src = buffer.pointee.mAudioData.bindMemory(to: Float.self, capacity: Int(frameLength))
dstLeft.initialize(from: src, count: Int(frameLength))
return audioBuffer
I fixed this by setting up the AVAudioSession before AudioQueueStart.
try AVAudioSession.sharedInstance().setCategory(.record, mode:.default)
try AVAudioSession.sharedInstance().setActive(true)
} catch{

Accessing Pixels Outside of the CVPixelBuffer that has been extended with Padding

I am trying to extend a CVPixelBuffer so that accesses to memory that is outside of the buffer does not cause a EXC_BAD_ACCESS error by reinitializing the CVPixelBuffer with padding. However, it doesn't seem to work. Any tips on what I am doing wrong would be greatly appreciated.
let paddingLeft = abs(min(cropX, 0))
let paddingRight = max((cropX + cropWidth) - (srcWidth - 1), 0)
let paddingBottom = max((cropY + cropHeight) - (srcHeight - 1), 0)
let paddingTop = abs(min(cropY, 0))
let attr = [kCVPixelBufferExtendedPixelsLeftKey: paddingLeft*40 + 1 as CFNumber,
kCVPixelBufferExtendedPixelsTopKey: paddingTop*40 + 1 as CFNumber,
kCVPixelBufferExtendedPixelsRightKey: paddingRight*40 + 1 as CFNumber,
kCVPixelBufferExtendedPixelsBottomKey: paddingBottom*40 + 1 as CFNumber]
guard kCVReturnSuccess == CVPixelBufferCreateWithBytes(kCFAllocatorDefault, srcWidth, srcHeight, pixelFormat, srcData, srcBytesPerRow, nil, nil, attr as CFDictionary, &paddedSrcPixelBuffer) else {
print("failed to allocate a new padded pixel buffer")
return nil
With an extended CVPixelBuffer, accessing data outside of the CVPixelBuffer (such as when the x,y is negative or bigger than the width/height of the buffer) should be defined behavior based on my understanding. Yet the following piece of code crashes on the last line inside of the VImageScale_ARGB8888 with EXC_BAD_ACCESS Code 1.
This presumably means that the data being accessed is being unmapped.
guard let paddedSrcData = CVPixelBufferGetBaseAddress(paddedSrcPixelBuffer) else {
print("Error: could not get padded pixel buffer base address")
return nil
srcBuffer = vImage_Buffer(data: paddedSrcData.advanced(by: offset),
height: vImagePixelCount(cropHeight),
width: vImagePixelCount(cropWidth),
rowBytes: srcBytesPerRow)
let destBytesPerRow = scaleWidth*4
let destData = malloc(scaleHeight*destBytesPerRow)
var destBuffer = vImage_Buffer(data: destData,
height: vImagePixelCount(scaleHeight),
width: vImagePixelCount(scaleWidth),
rowBytes: destBytesPerRow)
let vImageFlags: vImage_Flags = vImage_Flags(kvImageEdgeExtend)
let error = vImageScale_ARGB8888(&srcBuffer, &destBuffer, nil, vImageFlags) // crashes here due to EXC_BAD_ACCESS Code: 1
Many Thanks!
Here is a modified version of a resize function that will take create a padded buffer by copying contiguous sections of memory into a newly allocated buffer with the correct shape.
public func resizePixelBuffer(_ srcPixelBuffer: CVPixelBuffer,
cropX: Int,
cropY: Int,
cropWidth: Int,
cropHeight: Int,
scaleWidth: Int,
scaleHeight: Int) -> CVPixelBuffer? {
let flags = CVPixelBufferLockFlags(rawValue: 0)
let pixelFormat = CVPixelBufferGetPixelFormatType(srcPixelBuffer)
guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(srcPixelBuffer, flags) else {
return nil
defer { CVPixelBufferUnlockBaseAddress(srcPixelBuffer, flags) }
guard let srcData = CVPixelBufferGetBaseAddress(srcPixelBuffer) else {
print("Error: could not get pixel buffer base address")
return nil
let srcHeight = CVPixelBufferGetHeight(srcPixelBuffer)
let srcWidth = CVPixelBufferGetWidth(srcPixelBuffer)
let srcBytesPerRow = CVPixelBufferGetBytesPerRow(srcPixelBuffer)
let offset = cropY*srcBytesPerRow + cropX*4
var srcBuffer: vImage_Buffer!
var paddedSrcPixelBuffer: CVPixelBuffer!
if (cropX < 0 || cropY < 0 || cropX + cropWidth > srcWidth || cropY + cropHeight > srcHeight) {
let paddingLeft = abs(min(cropX, 0))
let paddingRight = max((cropX + cropWidth) - (srcWidth - 1), 0)
let paddingBottom = max((cropY + cropHeight) - (srcHeight - 1), 0)
let paddingTop = abs(min(cropY, 0))
let paddedHeight = paddingTop + srcHeight + paddingBottom
let paddedWidth = paddingLeft + srcWidth + paddingRight
guard kCVReturnSuccess == CVPixelBufferCreate(kCFAllocatorDefault, paddedWidth, paddedHeight, pixelFormat, nil, &paddedSrcPixelBuffer) else {
print("failed to allocate a new padded pixel buffer")
return nil
guard kCVReturnSuccess == CVPixelBufferLockBaseAddress(paddedSrcPixelBuffer, flags) else {
return nil
guard let paddedSrcData = CVPixelBufferGetBaseAddress(paddedSrcPixelBuffer) else {
print("Error: could not get padded pixel buffer base address")
return nil
let paddedBytesPerRow = CVPixelBufferGetBytesPerRow(paddedSrcPixelBuffer)
for yIndex in paddingTop..<srcHeight+paddingTop {
let dstRowStart = paddedSrcData.advanced(by: yIndex*paddedBytesPerRow).advanced(by: paddingLeft*4)
let srcRowStart = srcData.advanced(by: (yIndex - paddingTop)*srcBytesPerRow)
dstRowStart.copyMemory(from: srcRowStart, byteCount: srcBytesPerRow)
let paddedOffset = (cropY + paddingTop)*paddedBytesPerRow + (cropX + paddingLeft)*4
srcBuffer = vImage_Buffer(data: paddedSrcData.advanced(by: paddedOffset),
height: vImagePixelCount(cropHeight),
width: vImagePixelCount(cropWidth),
rowBytes: paddedBytesPerRow)
} else {
srcBuffer = vImage_Buffer(data: srcData.advanced(by: offset),
height: vImagePixelCount(cropHeight),
width: vImagePixelCount(cropWidth),
rowBytes: srcBytesPerRow)
let destBytesPerRow = scaleWidth*4
guard let destData = malloc(scaleHeight*destBytesPerRow) else {
print("Error: out of memory")
return nil
var destBuffer = vImage_Buffer(data: destData,
height: vImagePixelCount(scaleHeight),
width: vImagePixelCount(scaleWidth),
rowBytes: destBytesPerRow)
let vImageFlags: vImage_Flags = vImage_Flags(kvImageEdgeExtend)
let error = vImageScale_ARGB8888(&srcBuffer, &destBuffer, nil, vImageFlags)
if error != kvImageNoError {
print("Error:", error)
return nil
let releaseCallback: CVPixelBufferReleaseBytesCallback = { _, ptr in
if let ptr = ptr {
free(UnsafeMutableRawPointer(mutating: ptr))
var dstPixelBuffer: CVPixelBuffer?
let status = CVPixelBufferCreateWithBytes(nil, scaleWidth, scaleHeight,
pixelFormat, destData,
destBytesPerRow, releaseCallback,
nil, nil, &dstPixelBuffer)
if status != kCVReturnSuccess {
print("Error: could not create new pixel buffer")
return nil
if paddedSrcPixelBuffer != nil {
CVPixelBufferUnlockBaseAddress(paddedSrcPixelBuffer, flags)
return dstPixelBuffer

AudioQueueStart But no voice

import Foundation
import AudioToolbox
class AudioPlay {
//setting buffer num
static let knumberBuffers = 3
var aqData = AQPlayerState.init()
//A custom structure for a playback audio queue
class AQPlayerState {
var mDataFormat = AudioStreamBasicDescription()
var mQueue:AudioQueueRef?
var mBuffers = [AudioQueueBufferRef?].init(repeating: nil, count: AudioPlay.knumberBuffers)
var mAudioFile:AudioFileID?
var bufferByteSize = UInt32()
var mCurrentPacket:Int64?
var mNumPacketsToRead = UInt32()
var mPacketDescs:UnsafeMutablePointer<AudioStreamPacketDescription>?
var mIsRunning = false
//playbackAudioQueue callback
static let HandleOutputBuffer:AudioQueueOutputCallback = { (aqData1, inAQ, inBuffer) in
var pAqData = (aqData1?.assumingMemoryBound(to: AQPlayerState.self).pointee)!
guard pAqData.mIsRunning || pAqData.mQueue != nil else{
print("audioplay is not running exit callback func")
var numBytesReadFromFile = UInt32()
var numPackets = pAqData.mNumPacketsToRead
AudioFileReadPacketData(pAqData.mAudioFile!, false, &numBytesReadFromFile, pAqData.mPacketDescs, pAqData.mCurrentPacket!, &numPackets, inBuffer.pointee.mAudioData)
if numPackets > 0 {
inBuffer.pointee.mAudioDataByteSize = numBytesReadFromFile
AudioQueueEnqueueBuffer(pAqData.mQueue!, inBuffer, ((pAqData.mPacketDescs != nil) ? numPackets : UInt32(0)), pAqData.mPacketDescs)
pAqData.mCurrentPacket! += Int64(numPackets)
AudioQueueStop(pAqData.mQueue!, false)
pAqData.mIsRunning = false
//call func to set the property
//create new outputqueue
//start th audioqueue
func start() {
let url = Bundle.main.url(forResource: "123", withExtension: "mp3")!
let audioFileURL = url as CFURL
let result = AudioFileOpenURL(audioFileURL, .readPermission, 0, &aqData.mAudioFile)
var dataFormatSize = UInt32(MemoryLayout.size(ofValue: aqData.mDataFormat))
let result1 = AudioFileGetProperty(aqData.mAudioFile!, kAudioFilePropertyDataFormat,&dataFormatSize, &aqData.mDataFormat)
//get file property
var maxPacketSize = UInt32()
var propertySize = UInt32(MemoryLayout.size(ofValue: maxPacketSize))
let result2 = AudioFileGetProperty(aqData.mAudioFile!, kAudioFilePropertyPacketSizeUpperBound, &propertySize, &maxPacketSize)
//calculate and setting buffer size
DeriveBufferSize(ASBDesc: aqData.mDataFormat, maxPacketSize: maxPacketSize, seconds: 0.5, outBufferSize: &aqData.bufferByteSize, outNumPacketsToRead: &aqData.mNumPacketsToRead)
//check the format is VBR or CBR
let isFormatVBR = aqData.mDataFormat.mBytesPerPacket == 0 || aqData.mDataFormat.mFramesPerPacket == 0
if isFormatVBR {
aqData.mPacketDescs = UnsafeMutablePointer<AudioStreamPacketDescription>.allocate(capacity: MemoryLayout.size(ofValue: AudioStreamPacketDescription()))
aqData.mPacketDescs = nil
//create new audio queue
let result4 = AudioQueueNewOutput(&aqData.mDataFormat,AudioPlay.HandleOutputBuffer, &aqData,CFRunLoopGetCurrent(),CFRunLoopMode.commonModes.rawValue, 0, &aqData.mQueue)
//queue start
aqData.mIsRunning = true
//alloc memory buffer
aqData.mCurrentPacket = 0
for i in 0..<AudioPlay.knumberBuffers {
AudioQueueAllocateBuffer(aqData.mQueue!, aqData.bufferByteSize,&aqData.mBuffers[i])
AudioPlay.HandleOutputBuffer(&aqData,aqData.mQueue!, (aqData.mBuffers[i])!)
//start audioqueue
AudioQueueStart(aqData.mQueue!, nil)
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 0.25, false)
}while (aqData.mIsRunning)
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 1, false)
//calculate and setting buffer size
func DeriveBufferSize(ASBDesc:AudioStreamBasicDescription,maxPacketSize:UInt32,seconds:Float64,outBufferSize:UnsafeMutablePointer<UInt32>,outNumPacketsToRead:UnsafeMutablePointer<UInt32>) {
let maxBufferSize:UInt32 = 0x50000
let minBufferSIze:UInt32 = 0x4000
if ASBDesc.mFramesPerPacket != 0 {
let numPacketsForTime = ASBDesc.mSampleRate / Float64(ASBDesc.mFramesPerPacket) * seconds
outBufferSize.pointee = UInt32(numPacketsForTime) * maxPacketSize
outBufferSize.pointee = (maxBufferSize > maxPacketSize) ? maxBufferSize:maxPacketSize
if outBufferSize.pointee > maxBufferSize && outBufferSize.pointee > maxPacketSize {
outBufferSize.pointee = maxBufferSize
if outBufferSize.pointee < minBufferSIze{
outBufferSize.pointee = minBufferSIze
outNumPacketsToRead.pointee = outBufferSize.pointee/maxPacketSize
//dispose the audioqueue
func Dispose() {
AudioQueueDispose(aqData.mQueue!, true)
above code is writed according AudioQueueServiceProgrammingGuide!
create an instence of this class,and call the start() func
compile ok,but no output voice.
I had check the code many times but no advancing
can any one who familiar with audiiqueue help me?
Any help will be appreciated.
replace "AudioFileReadPacketData" with "AudioFileReadPackets" can fix this problem!
but I get a new problem like below sometime! sometimes it works well!
Stream Audio(20535,0x1085ac3c0) malloc: * error for object 0x6080001f6300: Invalid pointer dequeued from free list
* set a breakpoint in malloc_error_break to