I am trying to track face using VNFaceObservation in ARSCNView
let response = observations.map({ (face) -> (observation: VNFaceObservation, image: CIImage, frame: ARFrame) in
self.lastObservation = VNDetectedObjectObservation(boundingBox: face.boundingBox)
Than I do
func session(_ session: ARSession, didUpdate frame: ARFrame) {
let lastObservation = self.lastObservation
else { return }
let request = VNTrackObjectRequest(detectedObjectObservation: lastObservation, completionHandler: self.handleVisionRequestUpdate)
request.trackingLevel = .accurate
do {
try self.visionSequenceHandler.perform([request], on: frame.capturedImage)
} catch {
print("Throws: \(error)")
But the request result doesn't track the face and returns wrong rect
guard let newObservation = request.results?.first as? VNDetectedObjectObservation else { return }
I made this script in RealityKit & SwiftUI that when a reference image is detected it overlays a video with the same filename on top of it.
class Coordinator: NSObject, ARSessionDelegate {
var parent: ARViewContainer
var videoPlayer = AVPlayer()
init(parent: ARViewContainer) {
self.parent = parent
func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
guard let validAnchor = anchors[0] as? ARImageAnchor else { return }
let anchor = AnchorEntity(anchor: validAnchor)
func createdVideoPlayerNodeFor(_ target: ARReferenceImage) -> ModelEntity {
var videoPlane = ModelEntity()
//var videoPlayer = AVPlayer()
if let targetName = target.name,
let validURL = Bundle.main.url(forResource: targetName, withExtension: "mp4") {
videoPlayer = AVPlayer(url: validURL)
let videoMaterial = VideoMaterial(avPlayer: videoPlayer)
videoPlane = ModelEntity(mesh: .generatePlane(width: Float(target.physicalSize.width), depth: Float(target.physicalSize.height)), materials: [videoMaterial])
print (target.name as Any)
return videoPlane
It works as intended, but I would like to pause the video when the reference image is not tracked (without pausing other videos that might be in the view at the same time). I tried experimenting with this, but it doesn't work - it either pauses all videos or doesn't work at all.
func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
guard let validAnchor = anchors[0] as? ARImageAnchor else { return }
if validAnchor.isTracked {
} else {
How do I target only the video that stops being tracked, please?
My swift code should be able to take a snapshot of a video and then take that image and display in a uiimageview. Instead of using a online link I just want the url to be the uiview in my class.So the video url should be previewView not the https link that I have below. All the code below is in this class
import UIKit;import AVFoundation
class ViewController: UIViewController, AVCapturePhotoCaptureDelegate {
#IBOutlet var previewView : UIView!
#IBOutlet var captureImageView : UIImageView!
var captureSession: AVCaptureSession!
var stillImageOutput: AVCapturePhotoOutput!
var videoPreviewLayer: AVCaptureVideoPreviewLayer!
override func viewDidAppear(_ animated: Bool) {
// Setup your camera here...
captureSession = AVCaptureSession()
captureSession.sessionPreset = .medium
guard let backCamera = AVCaptureDevice.default(for: AVMediaType.video)
else {
print("Unable to access back camera!")
do {
let input = try AVCaptureDeviceInput(device: backCamera)
//Step 9
stillImageOutput = AVCapturePhotoOutput()
stillImageOutput = AVCapturePhotoOutput()
if captureSession.canAddInput(input) && captureSession.canAddOutput(stillImageOutput) {
catch let error {
print("Error Unable to initialize back camera: \(error.localizedDescription)")
func setupLivePreview() {
videoPreviewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
videoPreviewLayer.videoGravity = .resizeAspect
videoPreviewLayer.connection?.videoOrientation = .portrait
DispatchQueue.global(qos: .userInitiated).async { //[weak self] in
//Step 13
DispatchQueue.main.async {
self.videoPreviewLayer.frame = self.previewView.bounds
#IBAction func startRecord(_ sender: Any) {
#IBAction func Save(_ sender: Any) {
//what do I put in the 2 highlighted blocks
let videoURL = "https://www.youtube.com/watch?v=Txt25dw-lIk"
self.getThumbnailFromUrl(videoURL) { [weak self] (img) in
guard let _ = self else { return }
if let img = img {
self?.captureImageView.image = img
func getThumbnailFromUrl(_ url: String?, _ completion: #escaping ((_ image: UIImage?)->Void)) {
guard let url = URL(string: url ?? "") else { return }
DispatchQueue.main.async {
let asset = AVAsset(url: url)
let assetImgGenerate = AVAssetImageGenerator(asset: asset)
assetImgGenerate.appliesPreferredTrackTransform = true
let time = CMTimeMake(value: 2, timescale: 1)
do {
let img = try assetImgGenerate.copyCGImage(at: time, actualTime: nil)
let thumbnail = UIImage(cgImage: img)
} catch {
print("Error :: ", error.localizedDescription)
#IBAction func didTakePhoto(_ sender: Any) {
let settings = AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg])
stillImageOutput.capturePhoto(with: settings, delegate: self)
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
guard let imageData = photo.fileDataRepresentation()
else { return }
let image = UIImage(data: imageData)
captureImageView.image = image
override func viewWillDisappear(_ animated: Bool) {
When I try to run my camera I get an error message that says following
terminating with uncaught exception of type NSException and "Multiple audio/video AVCaptureInputs are not currently supported"
I have been watching for solutions on stackOverflow but haven't been succesful.
Tried to redirect my outlet my I can't understand where the problem occurs. Therefor I tried to put some breakpoint to find it but didn't manage
let captureSession = AVCaptureSession()
var previewLayer:CALayer!
var captureDevice:AVCaptureDevice!
var takePhoto = false
override func viewDidLoad() {
override func viewWillAppear(_ animated: Bool) {
func prepareCamera() {
captureSession.sessionPreset = AVCaptureSession.Preset.photo
let availableDevices = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .back).devices
captureDevice = availableDevices.first
func beginSession () {
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: captureDevice)
}catch {
let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
self.previewLayer = previewLayer
self.previewLayer.frame = self.view.layer.frame
let dataOutput = AVCaptureVideoDataOutput()
dataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString):NSNumber(value:kCVPixelFormatType_32BGRA)] as [String : Any]
dataOutput.alwaysDiscardsLateVideoFrames = true
if captureSession.canAddOutput(dataOutput) {
let queue = DispatchQueue(label: "com.brianadvent.captureQueue")
dataOutput.setSampleBufferDelegate(self, queue: queue)
#IBAction func takePhoto(_ sender: Any) {
takePhoto = true
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {
if takePhoto {
takePhoto = false
if let image = self.getImageFromSampleBuffer(buffer: sampleBuffer) {
let photoVC = UIStoryboard(name: "Main", bundle: nil).instantiateViewController(withIdentifier: "PhotoVC") as! Viewcontroller2
photoVC.takenPhoto = image
DispatchQueue.main.async {
self.present(photoVC, animated: true, completion: {
func getImageFromSampleBuffer (buffer:CMSampleBuffer) -> UIImage? {
if let pixelBuffer = CMSampleBufferGetImageBuffer(buffer) {
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let context = CIContext()
let imageRect = CGRect(x: 0, y: 0, width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer))
if let image = context.createCGImage(ciImage, from: imageRect) {
return UIImage(cgImage: image, scale: UIScreen.main.scale, orientation: .right)
return nil
func stopCaptureSession () {
if let inputs = captureSession.inputs as? [AVCaptureDeviceInput] {
for input in inputs {
override func didReceiveMemoryWarning() {
struct Constants {
static let apiKey = "AIzaSyDtaJ5eU24rbnHsG9pb1STOizDJvqcaj5E"
static let bundleId = "com.felibundle"
static let searchEngineId = "016628067786358079133:2gm9usqzouc"
#IBAction func pish(_ sender: Any) {
googleSearch(term: "George Bush") { results in
func googleSearch(term: String, callback:#escaping ([(title: String, url: String)]?) -> Void) {
let urlString = String(format: "https://cse.google.com/cse?cx=016628067786358079133:2gm9usqzouc", term, Constants.searchEngineId, Constants.apiKey)
let encodedUrl = urlString.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed)
guard let url = URL(string: encodedUrl ?? urlString) else {
print("invalid url \(urlString)")
let request = NSMutableURLRequest(url: url, cachePolicy: .useProtocolCachePolicy, timeoutInterval: 10)
request.httpMethod = "GET"
request.setValue(Constants.bundleId, forHTTPHeaderField: "X-Ios-Bundle-Identifier")
let session = URLSession.shared
let datatask = session.dataTask(with: request as URLRequest) { (data, response, error) in
error == nil,
let data = data,
let json = try? JSONSerialization.jsonObject(with: data, options: .allowFragments) as? [String : Any]
else {
guard let items = json?["items"] as? [[String : Any]], items.count > 0 else {
print("no results")
callback(items.map { ($0["title"] as! String, $0["formattedUrl"] as! String) })
Your code runs fine at my end.
However, this kind of error arrives when we try to add multiple input device to the same session. Make sure you are not adding AVCaptureInputs object elsewhere in your project
Add like to add filter to each frame i record in real time and display the filtered image in UIImageView, if anyone could help it would be nice.
but captureoutput is never called, here is my code.
class Measurement: UIViewController , AVCaptureVideoDataOutputSampleBufferDelegate {
#IBOutlet weak var cameraPreview: UIView!
#IBOutlet weak var imageView: UIImageView!
override func viewDidLoad() {
toggleTorch(on: true)
override func viewDidAppear(_ animated: Bool) {
lazy var cameraSession: AVCaptureSession = {
let s = AVCaptureSession()
s.sessionPreset = AVCaptureSession.Preset.low
return s
lazy var previewLayer: AVCaptureVideoPreviewLayer = {
let preview = AVCaptureVideoPreviewLayer(session: self.cameraSession)
preview.position = CGPoint(x:182,y: 485)
preview.videoGravity = AVLayerVideoGravity.resizeAspectFill
preview.connection?.videoOrientation = AVCaptureVideoOrientation.portrait
preview.bounds = imageView.bounds
//preview.position = CGPoint(x:self.view.bounds.midX,y: self.view.bounds.midY)
return preview
func toggleTorch(on: Bool) {
guard let device = AVCaptureDevice.default(for: .video) else { return }
if device.hasTorch {
do {
try device.lockForConfiguration()
if on == true {
device.torchMode = .on
} else {
device.torchMode = .off
} catch {
print("Torch could not be used")
} else {
print("Torch is not available")
func setupCameraSession() {
let captureDevice = AVCaptureDevice.default(for: AVMediaType.video)
do {
let deviceInput = try AVCaptureDeviceInput(device: captureDevice!)
if (cameraSession.canAddInput(deviceInput) == true) {
print("Processing Data.")
let dataOutput = AVCaptureVideoDataOutput()
dataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA as UInt32)] as [String : AnyObject]
dataOutput.alwaysDiscardsLateVideoFrames = true
print("Processing Data.")
if (cameraSession.canAddOutput(dataOutput) == true) {
print("Processing Data.")
let queue = DispatchQueue(label: "com.invasivecode.videoQueue")
dataOutput.setSampleBufferDelegate(self, queue: queue)
catch let error as NSError {
print("\(error), \(error.localizedDescription)")
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {
print("Processing Data.")
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
//let chromaKeyFilter = colorCubeFilterForChromaKey(hueAngle: 120)
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let context = CIContext()
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return }
let image = UIImage(cgImage: cgImage)
if let chromaKeyFilter = CIFilter(name: "CISepiaTone") {
let beginImage = CIImage(image: image)
chromaKeyFilter.setValue(beginImage, forKey: kCIInputImageKey)
chromaKeyFilter.setValue(0.5, forKey: kCIInputIntensityKey)
if let output = chromaKeyFilter.outputImage {
if let cgimg = context.createCGImage(output, from: output.extent) {
let processedImage = UIImage(cgImage: cgimg)
// do something interesting with the processed image
imageView.image = processedImage
func captureOutput(_ captureOutput: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Here you can count how many frames are dopped
func startCapture() {
print("\(self.classForCoder)/" + #function)
if cameraSession.isRunning {
print("already running")
toggleTorch(on: true)
You need to set the delegate
dataOutput.sampleBufferDelegate = self
I'm trying to use the new Apple Vision API to detect a barcode from an image and return its details. I've successfully detected a QR code and returned a message using the CIDetector. However I can't make this work for 1 dimensional barcodes. Heres an example result:
import UIKit
import Vision
class BarcodeDetector {
func recognizeBarcode(for source: UIImage,
complete: #escaping (UIImage) -> Void) {
var resultImage = source
let detectBarcodeRequest = VNDetectBarcodesRequest { (request, error) in
if error == nil {
if let results = request.results as? [VNBarcodeObservation] {
print("Number of Barcodes found: \(results.count)")
if results.count == 0 { print("\r") }
var barcodeBoundingRects = [CGRect]()
for barcode in results {
let barcodeType = String(barcode.symbology.rawValue)?.replacingOccurrences(of: "VNBarcodeSymbology", with: "")
print("-Barcode Type: \(barcodeType!)")
if barcodeType == "QR" {
let image = CIImage(image: source)
image?.cropping(to: barcode.boundingBox)
self.qrCodeDescriptor(qrCode: barcode, qrCodeImage: image!)
resultImage = self.drawOnImage(source: resultImage, barcodeBoundingRects: barcodeBoundingRects)
} else {
let vnImage = VNImageRequestHandler(cgImage: source.cgImage!, options: [:])
try? vnImage.perform([detectBarcodeRequest])
private func qrCodeDescriptor(qrCode: VNBarcodeObservation, qrCodeImage: CIImage) {
if let description = qrCode.barcodeDescriptor as? CIQRCodeDescriptor {
readQRCode(qrCodeImage: qrCodeImage)
print(" -Payload: \(description.errorCorrectedPayload)")
print(" -Mask Pattern: \(description.maskPattern)")
print(" -Symbol Version: \(description.symbolVersion)\n")
private func readQRCode(qrCodeImage: CIImage) {
let detector: CIDetector = CIDetector(ofType: CIDetectorTypeQRCode, context: nil, options: [CIDetectorAccuracy: CIDetectorAccuracyHigh])!
var qrCodeLink = ""
let features = detector.features(in: qrCodeImage)
for feature in features as! [CIQRCodeFeature] {
if let messageString = feature.messageString {
qrCodeLink += messageString
if qrCodeLink == "" {
print(" -No Code Message")
} else {
print(" -Code Message: \(qrCodeLink)")
How can I convert the image into an AVMetadataObject and then read it from there? Or is there a better approach?
Swift 4.1, using the Vision Framework (No 3rd party stuff or Pods)
Try this. It works for QR and for other types (Code39 in this example):
func startDetection() {
let request = VNDetectBarcodesRequest(completionHandler: self.detectHandler)
request.symbologies = [VNBarcodeSymbology.code39] // or use .QR, etc
self.requests = [request]
func detectHandler(request: VNRequest, error: Error?) {
guard let observations = request.results else {
//print("no result")
let results = observations.map({$0 as? VNBarcodeObservation})
for result in results {
And then in:
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
var requestOptions:[VNImageOption:Any] = [:]
if let camData = CMGetAttachment(sampleBuffer, kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, nil) {
requestOptions = [.cameraIntrinsics:camData]
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation(rawValue: 6)!, options: requestOptions)
do {
try imageRequestHandler.perform(self.requests)
} catch {
The rest of the implementation is the regular AVCaptureDevice and AVCaptureSession stuff. You will also need to conform to AVCaptureVideoDataOutputSampleBufferDelegate
import AVFoundation
import Vision
var captureDevice: AVCaptureDevice!
var session = AVCaptureSession()
var requests = [VNRequest]()
func viewDidLoad() {
func setupVideo() {
session.sessionPreset = AVCaptureSession.Preset.photo
captureDevice = AVCaptureDevice.default(for: AVMediaType.video)
let deviceInput = try! AVCaptureDeviceInput(device: captureDevice!)
let deviceOutput = AVCaptureVideoDataOutput()
deviceOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32BGRA)]
deviceOutput.setSampleBufferDelegate(self, queue: DispatchQueue.global(qos: DispatchQoS.QoSClass.default))
let imageLayer = AVCaptureVideoPreviewLayer(session: session)
imageLayer.frame = imageView.bounds