Using AudioToolbox instead of AVFoundation in SFSpeechRecognizer - swift

I have to use AudioToolbox instead AVAudioSession for providing stream to SFSpeechRecognizer. I know that I should use AudioQueue, so I made an audio recording export to CMSampleBuffer to read it with recognizer. And while debugging I see that the buffer is added to SFSpeechAudioBufferRecognitionRequest, but the code in the task closure doesn't execute: neither result, nor error.
What's wrong with the code?
let NUM_BUFFERS = 1
struct RecordState {
var dataFormat = AudioStreamBasicDescription()
var queue: AudioQueueRef?
var buffers: [AudioQueueBufferRef] = []
var audioFile: AudioFileID?
var currentPacket: Int64 = 0
var recording = false
}
func сallback(_ inUserData: UnsafeMutableRawPointer?,
_ inAQ: AudioQueueRef,
_ inBuffer: AudioQueueBufferRef,
_ inStartTime: UnsafePointer<AudioTimeStamp>,
_ inNumberPacketDescriptions: UInt32,
_ inPacketDescs: UnsafePointer<AudioStreamPacketDescription>?) {
let recordState = inUserData?.assumingMemoryBound(to: RecordState.self)
if let queue = recordState?.pointee.queue {
AudioQueueEnqueueBuffer(queue, inBuffer, 0, nil)
let rec = AudioRecorder.sharedInstance
rec.transformBuffer(pBuffer: inBuffer, pLength: inBuffer.pointee.mAudioDataByteSize)
}
}
class AudioRecorder: NSObject, ObservableObject, SFSpeechRecognizerDelegate {
let format = AudioStreamBasicDescription(mSampleRate: Float64(16000.0), mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagsNativeFloatPacked, mBytesPerPacket: UInt32(MemoryLayout<Float32>.size), mFramesPerPacket: 1, mBytesPerFrame: UInt32(MemoryLayout<Float32>.size), mChannelsPerFrame: 1, mBitsPerChannel: UInt32(MemoryLayout<Float32>.size * 8), mReserved: 0)
var recordState = RecordState()
var startTime = CFAbsoluteTimeGetCurrent()
static var sharedInstance = AudioRecorder()
private var speechRecognizer = SFSpeechRecognizer()!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var engineEnabled = false
private var lastText = [SFTranscriptionSegment]()
override init() {
super.init()
OperationQueue.main.addOperation {
SFSpeechRecognizer.requestAuthorization { authStatus in
switch authStatus {
case .authorized:
self.engineEnabled = true
default:
self.engineEnabled = false
}
}
}
self.speechRecognizer.delegate = self
}
func startRecording() {
recordState.dataFormat = format
var queue: AudioQueueRef?
if AudioQueueNewInput(&recordState.dataFormat, сallback, &recordState, CFRunLoopGetCurrent(), CFRunLoopMode.commonModes.rawValue, 0, &queue) == noErr {
recordState.queue = queue
} else {
return
}
for _ in 0..<NUM_BUFFERS {
var buffer: AudioQueueBufferRef?
if AudioQueueAllocateBuffer(queue!, 1024, &buffer) == noErr {
recordState.buffers.append(buffer!)
}
AudioQueueEnqueueBuffer(queue!, buffer!, 0, nil)
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
// Keep speech recognition data on device
if #available(iOS 13, *) {
recognitionRequest.requiresOnDeviceRecognition = true
}
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
print(result.bestTranscription.formattedString)
isFinal = result.isFinal
}
if error != nil || isFinal {
// Stop recognizing speech if there is a problem.
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
recordState.recording = true
if AudioQueueStart(recordState.queue!, nil) != noErr {
fatalError("Something is wrong")
}
self.startTime = CFAbsoluteTimeGetCurrent()
}
func stopRecording() {
recordState.recording = false
AudioQueueStop(recordState.queue!, true)
for i in 0..<NUM_BUFFERS {
if let buffers = recordState.buffers[i] as? AudioQueueBufferRef {
AudioQueueFreeBuffer(recordState.queue!, buffers)
}
}
AudioQueueDispose(recordState.queue!, true)
if let file = recordState.audioFile {
AudioFileClose(file)
}
}
func transformBuffer(pBuffer: AudioQueueBufferRef, pLength: UInt32) {
var blockBuffer: CMBlockBuffer?
CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault, memoryBlock: pBuffer, blockLength: Int(pLength), blockAllocator: kCFAllocatorNull, customBlockSource: nil, offsetToData: 0, dataLength: Int(pLength), flags: kCMBlockBufferAssureMemoryNowFlag, blockBufferOut: &blockBuffer)
let timeFormat = format.mSampleRate
let currentTime = CFAbsoluteTimeGetCurrent()
let elapsedTime: CFTimeInterval = currentTime - self.startTime
let timeStamp = CMTimeMake(value: Int64(elapsedTime * timeFormat), timescale: Int32(timeFormat))
let nSamples = Int(pLength / format.mBytesPerFrame)
do {
let formatDescription = try CMAudioFormatDescription(audioStreamBasicDescription: format)
var sampleBuffer: CMSampleBuffer?
CMAudioSampleBufferCreateWithPacketDescriptions(allocator: kCFAllocatorDefault, dataBuffer: blockBuffer, dataReady: true, makeDataReadyCallback: nil, refcon: nil, formatDescription: formatDescription, sampleCount: nSamples, presentationTimeStamp: timeStamp, packetDescriptions: nil, sampleBufferOut: &sampleBuffer)
if let sBuffer = sampleBuffer {
self.recognitionRequest?.appendAudioSampleBuffer(sBuffer)
}
} catch {
fatalError(error.localizedDescription)
}
}
}
UPD: I modified the code so it could be more descriptive

Finally, I've found the answer. Here's the code for the conversion of AudioQueueBufferRef into AVAudioPCMBuffer:
func queueBufferToAudioBuffer(_ buffer: AudioQueueBufferRef) -> AVAudioPCMBuffer? {
guard let audioFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: format.mSampleRate,
channels: format.mChannelsPerFrame,
interleaved: true)
else { return nil }
let frameLength = buffer.pointee.mAudioDataBytesCapacity / audioFormat.streamDescription.pointee.mBytesPerFrame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameLength) else { return nil }
audioBuffer.frameLength = frameLength
let dstLeft = audioBuffer.floatChannelData![0]
let src = buffer.pointee.mAudioData.bindMemory(to: Float.self, capacity: Int(frameLength))
dstLeft.initialize(from: src, count: Int(frameLength))
return audioBuffer
}

I fixed this by setting up the AVAudioSession before AudioQueueStart.
do{
try AVAudioSession.sharedInstance().setCategory(.record, mode:.default)
try AVAudioSession.sharedInstance().setActive(true)
} catch{
print(error)
}

Related

Memory leak when using AVCaptureSession to AVAssetWriter using AVAssetWriterDelegate for HLS

We have found AVAssetWriter to leak memory when using the needed delegate AVAssetWriterDelegate to create HLS fMP4 video. Even before ever using the given segment data to process and store.
When releasing the memory by hand (it feels like this is wrong) the memory leak seems to disappear.
Even in a minimal situation the memory increases rapidly.
import Cocoa
import AVFoundation
class ViewController: NSViewController {
override func viewDidLoad() {
super.viewDidLoad()
AVCaptureDevice.requestAccess(for: .video, completionHandler: {
_ in
})
}
private var fileWriter: AVAssetWriter!
private var videoInput: AVAssetWriterInput!
private var bufferAdaptor: AVAssetWriterInputPixelBufferAdaptor!
private var captureSession: AVCaptureSession!
internal let recordingQueue = DispatchQueue(label: "RecordingQueue", qos: .userInitiated)
internal let writerQueue = DispatchQueue(label: "WriterQueue", qos: .userInitiated)
#IBAction func startCapture(_ sender: NSButton) {
self.writerQueue.async {
let device = AVCaptureDevice.default(for: .video)!
try! device.lockForConfiguration()
device.activeFormat = device.formats.last!
device.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: 25)
device.activeVideoMinFrameDuration = CMTime(value: 1, timescale: 25)
device.unlockForConfiguration()
self.fileWriter = AVAssetWriter(contentType: .mpeg4Movie)
self.fileWriter.preferredOutputSegmentInterval = CMTime(seconds: 0.2, preferredTimescale: 60000)
self.fileWriter.outputFileTypeProfile = .mpeg4AppleHLS
self.fileWriter.initialSegmentStartTime = .zero
let videoOutputSettings: [String: Any] = [
AVVideoWidthKey: 1920,
AVVideoHeightKey: 1080,
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoCompressionPropertiesKey: [
AVVideoProfileLevelKey: AVVideoProfileLevelH264HighAutoLevel,
AVVideoAverageBitRateKey: 6000 * 1024
]
]
self.videoInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: videoOutputSettings)
self.fileWriter.movieTimeScale = CMTimeScale(exactly: 25)!
self.videoInput.mediaTimeScale = CMTimeScale(exactly: 25)!
self.videoInput.expectsMediaDataInRealTime = true
self.videoInput.performsMultiPassEncodingIfSupported = false
let sourcePixelBufferAttributes:[String:Any] = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32ARGB,
kCVPixelBufferMetalCompatibilityKey as String: true,
]
self.bufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: self.videoInput, sourcePixelBufferAttributes: sourcePixelBufferAttributes)
if self.fileWriter.canAdd(self.videoInput) {
self.fileWriter.add(self.videoInput)
} else {
return
}
self.fileWriter.delegate = self
self.captureSession = AVCaptureSession()
self.captureSession?.beginConfiguration()
let videoInput = try! AVCaptureDeviceInput(device: device)
if self.captureSession?.canAddInput(videoInput) ?? false {
self.captureSession?.addInput(videoInput)
} else {
return
}
self.captureSession?.sessionPreset = AVCaptureSession.Preset.high
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.setSampleBufferDelegate(self, queue: self.recordingQueue)
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.videoSettings = [
kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA,
kCVPixelBufferMetalCompatibilityKey as String: true
]
if self.captureSession?.canAddOutput(videoDataOutput) ?? false {
self.captureSession?.addOutput(videoDataOutput)
} else {
return
}
self.captureSession?.commitConfiguration()
try! device.lockForConfiguration()
self.captureSession?.startRunning()
device.unlockForConfiguration()
self.fileWriter.startWriting()
self.fileWriter.startSession(atSourceTime: CMTime.zero)
}
}
func write(sample: CMSampleBuffer) {
if self.videoInput.isReadyForMoreMediaData {
self.videoInput.append(sample)
}
}
}
extension ViewController: AVAssetWriterDelegate {
func assetWriter(_ writer: AVAssetWriter, didOutputSegmentData segmentData: Data, segmentType: AVAssetSegmentType, segmentReport: AVAssetSegmentReport?) {
print(segmentData.count)
// let _ = segmentData.withUnsafeBytes {
// raw in
// raw.baseAddress?.deallocate()
// }
}
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
self.write(sample: sampleBuffer)
}
}
Run this small code in a new project and you will see the memory grow. Uncomment the lines in the delegate and it's as expected.
What are we missing? Or did we discover a bug? (Already sent in to Apple).
Any ideas are welcome to get this leak closed...

How can mp3 data in memory be loaded into an AVAudioPCMBuffer in Swift?

I have a class method to read an mp3 file into an AVAudioPCMBuffer as follows:
private(set) var fullAudio: AVAudioPCMBuffer?
func initAudio(audioFileURL: URL) -> Bool {
var status = true
do {
let audioFile = try AVAudioFile(forReading: audioFileURL)
let audioFormat = audioFile.processingFormat
let audioFrameLength = UInt32(audioFile.length)
fullAudio = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameLength)
if let fullAudio = fullAudio {
try audioFile.read(into: fullAudio)
// processing of full audio
}
} catch {
status = false
}
return status
}
However, I now need to be able to read the same mp3 info from memory (rather than a file) into the AVAudioPCMBuffer without using the file system, where the info is held in the Data type, for example using a function declaration of the form
func initAudio(audioFileData: Data) -> Bool {
// some code setting up fullAudio
}
How can this be done? I've looked to see whether there is a route from Data holding mp3 info to AVAudioPCMBuffer (e.g. via AVAudioBuffer or AVAudioCompressedBuffer), but haven't seen a way forward.
I went down the rabbit hole on this one. Here is what probably amounts to a Rube Goldberg-esque solution:
A lot of the pain comes from using C from Swift.
func data_AudioFile_ReadProc(_ inClientData: UnsafeMutableRawPointer, _ inPosition: Int64, _ requestCount: UInt32, _ buffer: UnsafeMutableRawPointer, _ actualCount: UnsafeMutablePointer<UInt32>) -> OSStatus {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
let bufferPointer = UnsafeMutableRawBufferPointer(start: buffer, count: Int(requestCount))
let copied = data.copyBytes(to: bufferPointer, from: Int(inPosition) ..< Int(inPosition) + Int(requestCount))
actualCount.pointee = UInt32(copied)
return noErr
}
func data_AudioFile_GetSizeProc(_ inClientData: UnsafeMutableRawPointer) -> Int64 {
let data = inClientData.assumingMemoryBound(to: Data.self).pointee
return Int64(data.count)
}
extension Data {
func convertedTo(_ format: AVAudioFormat) -> AVAudioPCMBuffer? {
var data = self
var af: AudioFileID? = nil
var status = AudioFileOpenWithCallbacks(&data, data_AudioFile_ReadProc, nil, data_AudioFile_GetSizeProc(_:), nil, 0, &af)
guard status == noErr, af != nil else {
return nil
}
defer {
AudioFileClose(af!)
}
var eaf: ExtAudioFileRef? = nil
status = ExtAudioFileWrapAudioFileID(af!, false, &eaf)
guard status == noErr, eaf != nil else {
return nil
}
defer {
ExtAudioFileDispose(eaf!)
}
var clientFormat = format.streamDescription.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientDataFormat, UInt32(MemoryLayout.size(ofValue: clientFormat)), &clientFormat)
guard status == noErr else {
return nil
}
if let channelLayout = format.channelLayout {
var clientChannelLayout = channelLayout.layout.pointee
status = ExtAudioFileSetProperty(eaf!, kExtAudioFileProperty_ClientChannelLayout, UInt32(MemoryLayout.size(ofValue: clientChannelLayout)), &clientChannelLayout)
guard status == noErr else {
return nil
}
}
var frameLength: Int64 = 0
var propertySize: UInt32 = UInt32(MemoryLayout.size(ofValue: frameLength))
status = ExtAudioFileGetProperty(eaf!, kExtAudioFileProperty_FileLengthFrames, &propertySize, &frameLength)
guard status == noErr else {
return nil
}
guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(frameLength)) else {
return nil
}
let bufferSizeFrames = 512
let bufferSizeBytes = Int(format.streamDescription.pointee.mBytesPerFrame) * bufferSizeFrames
let numBuffers = format.isInterleaved ? 1 : Int(format.channelCount)
let numInterleavedChannels = format.isInterleaved ? Int(format.channelCount) : 1
let audioBufferList = AudioBufferList.allocate(maximumBuffers: numBuffers)
for i in 0 ..< numBuffers {
audioBufferList[i] = AudioBuffer(mNumberChannels: UInt32(numInterleavedChannels), mDataByteSize: UInt32(bufferSizeBytes), mData: malloc(bufferSizeBytes))
}
defer {
for buffer in audioBufferList {
free(buffer.mData)
}
free(audioBufferList.unsafeMutablePointer)
}
while true {
var frameCount: UInt32 = UInt32(bufferSizeFrames)
status = ExtAudioFileRead(eaf!, &frameCount, audioBufferList.unsafeMutablePointer)
guard status == noErr else {
return nil
}
if frameCount == 0 {
break
}
let src = audioBufferList
let dst = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList)
if src.count != dst.count {
return nil
}
for i in 0 ..< src.count {
let srcBuf = src[i]
let dstBuf = dst[i]
memcpy(dstBuf.mData?.advanced(by: Int(dstBuf.mDataByteSize)), srcBuf.mData, Int(srcBuf.mDataByteSize))
}
pcmBuffer.frameLength += frameCount
}
return pcmBuffer
}
}
A more robust solution would probably read the sample rate and channel count and give the option to preserve them.
Tested using:
let url = URL(fileURLWithPath: "/tmp/test.mp3")
let data = try! Data(contentsOf: url)
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)!
if let d = data.convertedTo(format) {
let avf = try! AVAudioFile(forWriting: URL(fileURLWithPath: "/tmp/foo.wav"), settings: format.settings, commonFormat: format.commonFormat, interleaved: format.isInterleaved)
try! avf.write(from: d)
}

How can I record AVDepthData video and save in the gallery?

I am developing an application to record RGB-D sequences with the iPhone by using the DualRearCamera or the TrueDepthCamera. I can capture and visualize the RGB frame and depth frames and I developed a version where I can compress this data and save in the internal files of the iPhone. Nevertheless, my idea is to save both sequences (RGB and depth map sequences) in the gallery, but I am having problems to use AVAssetWritter and create a depth map video.
I am using the iPhone X, Xcode 10.2.1 and swift 5
import UIKit
import AVFoundation
import AssetsLibrary
var noMoreSpace = false
class ViewController: UIViewController{
#IBOutlet weak var previewView: UIImageView!
#IBOutlet weak var timeLabel: UILabel!
#IBOutlet weak var previewModeControl: UISegmentedControl!
let session = AVCaptureSession()
let dataOutputQueue = DispatchQueue(label: "video data queue")
let videoOutput = AVCaptureVideoDataOutput()
let movieOutput = AVCaptureMovieFileOutput()
let depthOutput = AVCaptureDepthDataOutput()
let depthCapture = DepthCapture()
var previewLayer = AVCaptureVideoPreviewLayer()
var inputDevice: AVCaptureDeviceInput!
let videoDeviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInDualCamera, .builtInTrueDepthCamera], mediaType: .video, position: .unspecified)
var Timestamp: String {
let currentDate = NSDate()
let dateFormatter = DateFormatter()
dateFormatter.dateFormat = "ddMM_HHmmss"
return "\(dateFormatter.string(from: currentDate as Date))"
}
var isRecording = false
var time = 0
var timer = Timer()
enum PreviewMode: Int {
case original
case depth
}
var previewMode = PreviewMode.original
var depthMap: CIImage?
var scale: CGFloat = 0.0
//let sessionQueue = DispatchQueue(label: "session queue")
override func viewDidLoad() {
super.viewDidLoad()
timeLabel.isHidden = true //TODO: Disable the rest of the UI
previewMode = PreviewMode(rawValue: previewModeControl.selectedSegmentIndex) ?? .original
configureCaptureSession()
session.startRunning()
}
func configureCaptureSession() {
session.beginConfiguration()
let camera = AVCaptureDevice.default(.builtInTrueDepthCamera, for: .video, position: .unspecified)!
do {
let cameraInput = try AVCaptureDeviceInput(device: camera)
if session.canAddInput(cameraInput){
session.sessionPreset = .vga640x480
session.addInput(cameraInput)
self.inputDevice = cameraInput
}
if session.canAddOutput(videoOutput){
videoOutput.setSampleBufferDelegate(self, queue: dataOutputQueue)
videoOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA]
session.addOutput(videoOutput)
let videoConnection = videoOutput.connection(with: .video)
videoConnection?.videoOrientation = .portrait
//previewLayer = AVCaptureVideoPreviewLayer(session: session)
//previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
//previewLayer.connection?.videoOrientation = AVCaptureVideoOrientation.portrait
//previewView.layer.addSublayer(previewLayer)
//previewLayer.position = CGPoint(x: self.previewView.frame.width / 2, y: self.previewView.frame.height / 2)
//previewLayer.bounds = previewView.frame
}
//Add Depth output to the session
if session.canAddOutput(depthOutput){
session.addOutput(depthOutput)
depthOutput.setDelegate(self, callbackQueue: dataOutputQueue)
depthOutput.isFilteringEnabled = true
let depthConnection = depthOutput.connection(with: .depthData)
depthConnection?.videoOrientation = .portrait
}
/*if session.canAddOutput(movieOutput){
session.addOutput(movieOutput)
}*/
} catch {
print("Error")
}
let outputRect = CGRect(x: 0, y: 0, width: 1, height: 1)
let videoRect = videoOutput.outputRectConverted(fromMetadataOutputRect: outputRect)
let depthRect = depthOutput.outputRectConverted(fromMetadataOutputRect: outputRect)
// Calculate the scaling factor between videoRect and depthRect
scale = max(videoRect.width, videoRect.height) / max(depthRect.width, depthRect.height)
// Change the AVCaptureDevice configuration, so you need to lock it
do{
try camera.lockForConfiguration()
// Set the AVCaptureDevice‘s minimum frame duration (which is the inverse of the maximum frame rate) to be equal to the supported frame rate of the depth data
if let frameDuration = camera.activeDepthDataFormat?.videoSupportedFrameRateRanges.first?.minFrameDuration{
camera.activeVideoMinFrameDuration = frameDuration
}
// Unlock the configuration you locked
camera.unlockForConfiguration()
}catch{
fatalError(error.localizedDescription)
}
session.commitConfiguration()
}
#IBAction func startStopRecording(_ sender: Any) {
if isRecording{
stopRecording()
} else {
startRecording()
}
}
func startRecording(){
timeLabel.isHidden = false
timer = Timer.scheduledTimer(timeInterval: 1, target: self, selector: #selector(ViewController.timerAction), userInfo: nil, repeats: true)
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
let flagTime = Timestamp
let auxStr = flagTime+"_output.mp4"
let fileUrl = paths[0].appendingPathComponent(auxStr)
depthCapture.prepareForRecording(timeFlag: flagTime)
movieOutput.startRecording(to: fileUrl, recordingDelegate: self)
print(fileUrl.absoluteString)
print("Recording started")
self.isRecording = true
}
func stopRecording(){
timeLabel.isHidden = true
timer.invalidate()
time = 0
timeLabel.text = "0"
movieOutput.stopRecording()
print("Stopped recording!")
self.isRecording = false
do {
try depthCapture.finishRecording(success: { (url: URL) -> Void in
print(url.absoluteString)
})
} catch {
print("Error while finishing depth capture.")
}
}
#objc func timerAction() {
time += 1
timeLabel.text = String(time)
}
#IBAction func previeModeChanged(_ sender: UISegmentedControl) {
previewMode = PreviewMode(rawValue: previewModeControl.selectedSegmentIndex) ?? .original
}
#IBAction func switchCamera(_ sender: Any) {
let currentDevice = self.inputDevice.device
let currentPosition = currentDevice.position
let preferredPosition: AVCaptureDevice.Position
let preferredDeviceType: AVCaptureDevice.DeviceType
let devices = self.videoDeviceDiscoverySession.devices
var newVideoDevice: AVCaptureDevice? = nil
switch currentPosition {
case .unspecified, .front:
preferredPosition = .back
preferredDeviceType = .builtInDualCamera
case .back:
preferredPosition = .front
preferredDeviceType = .builtInTrueDepthCamera
#unknown default:
preferredPosition = .back
preferredDeviceType = .builtInDualCamera
}
// First, seek a device with both the preferred position and device type. Otherwise, seek a device with only the preferred position. ENTENDER MEJOR LQS CONDICIONES
if let device = devices.first(where: { $0.position == preferredPosition && $0.deviceType == preferredDeviceType }) {
newVideoDevice = device
} else if let device = devices.first(where: { $0.position == preferredPosition }) {
newVideoDevice = device
}
if let videoDevice = newVideoDevice {
do {
let cameraInput = try AVCaptureDeviceInput(device: videoDevice)
self.session.beginConfiguration()
self.session.removeInput(self.inputDevice)
if self.session.canAddInput(cameraInput) {
session.sessionPreset = .vga640x480
self.session.addInput(cameraInput)
self.inputDevice = cameraInput
}else {
self.session.addInput(self.inputDevice)
}
self.session.commitConfiguration()
} catch{
print("Error occurred while creating video device input: \(error)")
}
}
}
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate{
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
let image = CIImage(cvPixelBuffer: pixelBuffer!)
let previewImage: CIImage
switch previewMode {
case .original:
previewImage = image
case .depth:
previewImage = depthMap ?? image
}
let displayImage = UIImage(ciImage: previewImage)
DispatchQueue.main.async {[weak self] in self?.previewView.image = displayImage}
}
}
extension ViewController: AVCaptureDepthDataOutputDelegate{
func depthDataOutput(_ output: AVCaptureDepthDataOutput, didOutput depthData: AVDepthData, timestamp: CMTime, connection: AVCaptureConnection) {
var convertedDepth: AVDepthData
// Ensure the depth data is the format you need: 32 bit FP disparity.???
if depthData.depthDataType != kCVPixelFormatType_DepthFloat16{
convertedDepth = depthData.converting(toDepthDataType: kCVPixelFormatType_DepthFloat32)
}else{
convertedDepth = depthData
}
// You save the depth data map from the AVDepthData object as a CVPixelBuffer
let pixelBuffer = convertedDepth.depthDataMap
//Using an extension, you then clamp the pixels in the pixel buffer to keep them between 0.0 and 1.0.
pixelBuffer.clamp()
// Convert the pixel buffer into a CIImage
let depthMap = CIImage(cvPixelBuffer: pixelBuffer)
// You store depthMap in a class variable for later use
DispatchQueue.main.async {
[weak self] in self?.depthMap = depthMap
}
}
}

Converting AVAudioPCMBuffer to another AVAudioPCMBuffer

I am trying to convert a determined AVAudioPCMBuffer (44.1khz, 1ch, float32, not interleaved) to another AVAudioPCMBuffer (16khz, 1ch, int16, not interleaved) using AVAudioConverter and write it using AVAudioFile.
My code uses the library AudioKit together with the tap AKLazyTap to get a buffer each determined time, based on this source:
https://github.com/AudioKit/AudioKit/tree/master/AudioKit/Common/Taps/Lazy%20Tap
Here is my implementation:
lazy var downAudioFormat: AVAudioFormat = {
let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
return AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: 16000,
interleaved: false,
channelLayout: avAudioChannelLayout)
}()
//...
AKSettings.sampleRate = 44100
AKSettings.numberOfChannels = AVAudioChannelCount(1)
AKSettings.ioBufferDuration = 0.002
AKSettings.defaultToSpeaker = true
//...
let mic = AKMicrophone()
let originalAudioFormat: AVAudioFormat = mic.avAudioNode.outputFormat(forBus: 0) //41.100, 1ch, float32...
let inputFrameCapacity = AVAudioFrameCount(1024)
//I don't think this is correct, the audio is getting chopped...
//How to calculate it correctly?
let outputFrameCapacity = AVAudioFrameCount(512)
guard let inputBuffer = AVAudioPCMBuffer(
pcmFormat: originalAudioFormat,
frameCapacity: inputFrameCapacity) else {
fatalError()
}
// Your timer should fire equal to or faster than your buffer duration
bufferTimer = Timer.scheduledTimer(
withTimeInterval: AKSettings.ioBufferDuration/2,
repeats: true) { [weak self] _ in
guard let unwrappedSelf = self else {
return
}
unwrappedSelf.lazyTap?.fillNextBuffer(inputBuffer, timeStamp: nil)
// This is important, since we're polling for samples, sometimes
//it's empty, and sometimes it will be double what it was the last call.
if inputBuffer.frameLength == 0 {
return
}
//This converter is only create once, as the AVAudioFile. Ignore this code I call a function instead.
let converter = AVAudioConverter(from: originalAudioFormat, to: downAudioFormat)
converter.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Normal
converter.sampleRateConverterQuality = .min
converter.bitRateStrategy = AVAudioBitRateStrategy_Constant
guard let outputBuffer = AVAudioPCMBuffer(
pcmFormat: converter.outputFormat,
frameCapacity: outputFrameCapacity) else {
print("Failed to create new buffer")
return
}
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return inputBuffer
}
var error: NSError?
let status: AVAudioConverterOutputStatus = converter.convert(
to: outputBuffer,
error: &error,
withInputFrom: inputBlock)
switch status {
case .error:
if let unwrappedError: NSError = error {
print(unwrappedError)
}
return
default: break
}
//Only created once, instead of this code my code uses a function to verify if the AVAudioFile has been created, ignore it.
outputAVAudioFile = try AVAudioFile(
forWriting: unwrappedCacheFilePath,
settings: format.settings,
commonFormat: format.commonFormat,
interleaved: false)
do {
try outputAVAudioFile?.write(from: avAudioPCMBuffer)
} catch {
print(error)
}
}
(Please note that AVAudioConverter and AVAudioFile are being reused, the initialization there doesn't represent the real implementation on my code, just to simplify and make it more simple to understand.)
With frameCapacity on the outputBuffer: AVAudioPCMBuffer set to 512, the audio get chopped. Is there any way to discovery the correct frameCapacity for this buffer?
Written using Swift 4 and AudioKit 4.1.
Many thanks!
I managed to solve this problem installing a Tap on the inputNode like this:
lazy var downAudioFormat: AVAudioFormat = {
let avAudioChannelLayout = AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!
return AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: SAMPLE_RATE,
interleaved: true,
channelLayout: avAudioChannelLayout)
}()
private func addBufferListener(_ avAudioNode: AVAudioNode) {
let originalAudioFormat: AVAudioFormat = avAudioNode.inputFormat(forBus: 0)
let downSampleRate: Double = downAudioFormat.sampleRate
let ratio: Float = Float(originalAudioFormat.sampleRate)/Float(downSampleRate)
let converter: AVAudioConverter = buildConverter(originalAudioFormat)
avAudioNode.installTap(
onBus: 0,
bufferSize: AVAudioFrameCount(downSampleRate * 2),
format: originalAudioFormat,
block: { (buffer: AVAudioPCMBuffer!, _ : AVAudioTime!) -> Void in
let capacity = UInt32(Float(buffer.frameCapacity)/ratio)
guard let outputBuffer = AVAudioPCMBuffer(
pcmFormat: self.downAudioFormat,
frameCapacity: capacity) else {
print("Failed to create new buffer")
return
}
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
var error: NSError?
let status: AVAudioConverterOutputStatus = converter.convert(
to: outputBuffer,
error: &error,
withInputFrom: inputBlock)
switch status {
case .error:
if let unwrappedError: NSError = error {
print("Error \(unwrappedError)"))
}
return
default: break
}
self.delegate?.flushAudioBuffer(outputBuffer)
})
}

Swift 3 OutputStream hanging?

I wrote a simple Swift program to write to a server; I want to send data back to back or in a loop to a server; I tried doing something simple where I can simply send a message back to back (two pieces of data) but there is a hang; that is, the server only receives the first message but not the second message; why is this?
I am using StreamDelegate in Swift 3 and I am using OutputStream write method
print("Hello, World!")
import Foundation
class Connection: NSObject, StreamDelegate {
private var inputStream: InputStream!
private var outputStream: OutputStream!
private var my_address: CFString
private var my_port: UInt32
init (address: CFString, port:UInt32) {
self.my_address = address
self.my_port = port
super.init()
}
func connect() {
print("connecting...")
var readStream: Unmanaged<CFReadStream>?
var writeStream: Unmanaged<CFWriteStream>?
CFStreamCreatePairWithSocketToHost(nil, self.my_address, self.my_port, &readStream, &writeStream)
// Documentation suggests readStream and writeStream can be assumed to
// be non-nil. It might be wise to test if either is nil
// and implement error-handling as needed.
// self.inputStream = readStream!.takeRetainedValue()
self.outputStream = writeStream!.takeRetainedValue()
// self.inputStream.delegate = self
self.outputStream.delegate = self
// self.inputStream.schedule(in: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
// self.outputStream.schedule(in: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
// self.inputStream.open()
self.outputStream.open()
}
func disconnect() {
self.inputStream.close()
self.outputStream.close()
}
func stream(aStream: Stream, handleEvent eventCode: Stream.Event) {
switch (eventCode){
case Stream.Event.errorOccurred:
NSLog("ErrorOccurred")
case Stream.Event.endEncountered:
NSLog("EndEncountered")
case Stream.Event.hasBytesAvailable:
NSLog("HasBytesAvaible")
var buffer = [UInt8](repeating: 0, count: 4096)
while (inputStream.hasBytesAvailable){
var len = inputStream.read(&buffer, maxLength: buffer.count)
if(len > 0){
var output = NSString(bytes: &buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue)
if (output != ""){
NSLog("server said: %#", output!)
}
} else {
print("empty string from stream")
}
}
case Stream.Event.openCompleted:
NSLog("OpenCompleted")
case Stream.Event.hasSpaceAvailable:
NSLog("HasSpaceAvailable")
default: print("default reached. unknown stream event")
}
}
func sendMessage(message: String) {
let buff = [UInt8](message.utf8)
self.outputStream.write(buff, maxLength: buff.count)
}
func getOutputStream() -> OutputStream {
return self.outputStream
}
}
var socket = Connection(address: "000.000.0.000" as CFString, port: 0000)
socket.connect()
socket.sendMessage(message: "Hey you! 1")
socket.sendMessage(message: "Hey you! 2")
\
import Darwin
import Foundation
import UIKit
import Dispatch
class ViewController: UIViewController {
#IBOutlet private weak var joystickMove: Joystick!
#IBOutlet private weak var joystickRotate: Joystick!
private var joystick = Joystick()
private var contour = Contours()
private var contour_index: Int = 0
private var socket = Connection(address: "000.000.0.000" as CFString, port: 0000)
override func viewDidLoad() {
super.viewDidLoad()
createJoystick()
createContours()
createButton()
}
private func createJoystick() {
let n: CGFloat = 100.0
let x: CGFloat = (UIScreen.main.bounds.width/2) - (n/2.0)
let y: CGFloat = UIScreen.main.bounds.height - (UIScreen.main.bounds.height/4.0)
self.joystick.frame = CGRect(x: x, y: y, width: n, height: n)
self.joystick.backgroundColor = UIColor.clear
self.joystick.substrateColor = UIColor.lightGray
self.joystick.substrateBorderColor = UIColor.gray
self.joystick.substrateBorderWidth = 1.0
self.joystick.stickSize = CGSize(width: 50.0, height: 50.0)
self.joystick.stickColor = UIColor.darkGray
self.joystick.stickBorderColor = UIColor.black
self.joystick.stickBorderWidth = 2.0
self.joystick.fade = 0.5
var packet = ""
DispatchQueue.global(qos: .userInitiated).async { // do some task
self.joystick.trackingHandler = { (data) -> () in
let power = sqrt(pow(Double(data.velocity.x), 2.0) + pow(Double(data.velocity.y), 2.0))
let theta = atan2(Double(-data.velocity.y), Double(data.velocity.x))
let degrees = theta * (180.0 / M_PI)
if degrees >= 55 && degrees <= 125 { // move forward
packet = "\(1) \(1) \(power) \(power)"
} else if degrees >= -125 && degrees <= -55 { // move backwards
packet = "\(-1) \(-1) \(power) \(power)"
} else if degrees >= -55 && degrees <= 55 { // turn right
packet = "\(1) \(-1) \(power) \(power)"
} else if (degrees >= 125 && degrees <= 180) && (degrees >= -180 && degrees <= -125) { // turn left
packet = "\(-1) \(1) \(power) \(power)"
}
self.socket.connect()
let sent = self.socket.sendMessage(message: packet)
if sent {
print("packet sent!\n \(packet)")
}
self.socket.stream(aStream: self.socket.inputStream, handleEvent: Stream.Event.hasBytesAvailable)
// self.socket.changeWhenHasBytesAvailable = { str in
// print("my-str: \(str.characters)")
// }
self.socket.disconnect()
}
}
view.addSubview(joystick)
}
private func createContours() {
let n: CGFloat = 350.0
let x: CGFloat = (UIScreen.main.bounds.width/2.0) - (n/2.0)
let y: CGFloat = UIScreen.main.bounds.height - (UIScreen.main.bounds.height/4.0) - n - 100.0
self.contour.frame = CGRect(x: x, y: y, width: n, height: n)
self.contour.backgroundColor = UIColor.clear
view.addSubview(self.contour)
}
private func createButton() {
let width: CGFloat = 150.0
let height: CGFloat = 75.0
let x: CGFloat = (UIScreen.main.bounds.width/2.0) - (width/2.0)
let y: CGFloat = UIScreen.main.bounds.height - (UIScreen.main.bounds.height/4.0) - width
let button: UIButton = UIButton(frame: CGRect(x: x, y: y, width: width, height: height))
button.backgroundColor = UIColor.blue
button.setTitle("Contour Views", for: .normal)
button.addTarget(self, action: #selector(self.buttonAction), for: .touchUpInside)
button.tag = 1
view.addSubview(button)
}
#objc private func buttonAction(sender: UIButton!) {
var btnsendtag: UIButton = sender
if btnsendtag.tag == 1 {
self.contour_index = (self.contour_index + 1) % 2
switch self.contour_index {
case 0:
for index in 0...356 {
if self.contour.distx[index] != -1 && self.contour.disty[index] != -1 {
self.contour.circles[index].alpha = 0
}
}
case 1:
for index in 0...356 {
if self.contour.distx[index] != -1 && self.contour.disty[index] != -1 {
self.contour.circles[index].alpha = 1
}
}
default:
for index in 0...356 {
if self.contour.distx[index] != -1 && self.contour.disty[index] != -1 {
self.contour.circles[index].alpha = 1
UIColor.cyan.setFill()
self.contour.lines[index].fill()
self.contour.lines[index].stroke()
}
}
}
}
}
override func viewDidAppear(_ animated: Bool) {
DispatchQueue.global(qos: .background).async { // do some task
// self.socket.connect()
// var packet = "1 1 0 0"
// let sent = self.socket.sendMessage(message: packet)
// if sent {
// print("packet sent!\n \(packet)")
// }
// self.socket.disconnect()
}
}
public func delayWithSeconds(_ seconds: Double, completion: #escaping () -> ()) {
DispatchQueue.main.asyncAfter(deadline: .now() + seconds) {
completion()
}
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
override func viewWillDisappear(_ animated: Bool) {
// self.socket.disconnect()
}
}
\
import Foundation
class Connection: NSObject, StreamDelegate {
public var inputStream: InputStream!
private var outputStream: OutputStream!
private var my_address: CFString
private var my_port: UInt32
public var changeWhenHasBytesAvailable: ((String)->())?
init (address: CFString, port:UInt32) {
self.my_address = address
self.my_port = port
super.init()
}
public func connect() {
var readStream: Unmanaged<CFReadStream>?
var writeStream: Unmanaged<CFWriteStream>?
CFStreamCreatePairWithSocketToHost(nil, self.my_address, self.my_port, &readStream, &writeStream)
self.inputStream = readStream!.takeRetainedValue()
self.outputStream = writeStream!.takeRetainedValue()
self.inputStream.delegate = self
self.outputStream.delegate = self
self.inputStream.schedule(in: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
self.outputStream.schedule(in: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
self.inputStream.open()
self.outputStream.open()
}
public func stream(aStream: Stream, handleEvent eventCode: Stream.Event) {
// DispatchQueue.global(qos: .userInitiated).async { // do some task
print("stream")
if aStream === self.inputStream {
switch eventCode {
case Stream.Event.openCompleted:
print("open")
break
case Stream.Event.errorOccurred:
print("error")
break
case Stream.Event.hasBytesAvailable:
print("available!")
// var inputBuffer = Array<UInt8>(repeating: 0, count:2048)
// self.inputStream?.read(&inputBuffer, maxLength: 2048)
// self.changeWhenHasBytesAvailable?(String(bytes: inputBuffer, encoding: String.Encoding.utf8)!)
var buffer = [UInt8](repeating: 0, count: 512)
while (self.inputStream.hasBytesAvailable){
print("before read")
var len = self.inputStream.read(&buffer, maxLength: buffer.count)
print("after read")
if(len > 0){
// var output = NSString(bytes: &buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue)
var output = NSString(bytes: &buffer, length: len, encoding: String.Encoding.utf8.rawValue)
if (output != ""){
print("output: \(output!)")
}
} else {
print("empty string from stream")
}
print("while")
}
break
default:
print("default")
break
}
}
// }
}
public func sendMessage(message: String) -> Bool {
var sent: Bool = false
let buff = [UInt8](message.utf8)
let result = self.outputStream.write(buff, maxLength: buff.count)
if result != -1 {
sent = true
}
return sent
}
public func disconnect() {
self.inputStream.close()
self.outputStream.close()
// self.inputStream.remove(from: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
// self.outputStream.remove(from: RunLoop.current, forMode: RunLoopMode.defaultRunLoopMode)
}
}
update; send works! but not receiving data?
Don't use a while inside the StreamDelegate when you are reading:
while (inputStream.hasBytesAvailable){
var len = inputStream.read(&buffer, maxLength: buffer.count)
...
}
Just call read. If there are more bytes, your delegate will be called again.