How to properly place SCNNode on top of the QR Code? - swift

I want to detect QR codes in the vertical plane and place a node on top of the detected QR. For QR detection, I used Vision framework and Arkit to place the nodes as below code. Whenever placing the node, it is not attached to the QR code and placed somewhere else.
Could someone help to figure out what I have done wrong?
class ViewController: UIViewController, ARSCNViewDelegate,ARSessionDelegate{
#IBOutlet var sceneView: ARSCNView!
var qrRequests = [VNRequest]()
var detectedDataAnchor: ARAnchor?
var processing = false
let configuration = ARWorldTrackingConfiguration()
override func viewDidLoad() {
super.viewDidLoad()
self.sceneView.delegate = self
self.sceneView.session.delegate = self
self.sceneView.session.run(configuration)
startQrCodeDetection()
}
func startQrCodeDetection() {
let request = VNDetectBarcodesRequest(completionHandler: self.requestHandler)
self.qrRequests = [request]
}
public func session(_ session: ARSession, didUpdate frame: ARFrame) {
DispatchQueue.global(qos: .userInitiated).async {
do {
if self.processing {
return
}
self.processing = true
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: frame.capturedImage,
options: [:])
try imageRequestHandler.perform(self.qrRequests)
} catch {
}
}
}
func requestHandler(request: VNRequest, error: Error?) {
if let results = request.results, let result = results.first as? VNBarcodeObservation {
guard let payload = result.payloadStringValue else {return}
var rect = result.boundingBox
let center = CGPoint(x: rect.midX, y: rect.midY)
DispatchQueue.main.async {
self.hitTestQrCode(center: center)
self.processing = false
}
} else {
self.processing = false
}
}
func hitTestQrCode(center: CGPoint) {
print("Hit Test")
if let hitTestResults = self.sceneView?.hitTest(center, types: [.featurePoint, .existingPlaneUsingExtent] ),
let hitTestResult = hitTestResults.first {
if let detectedDataAnchor = self.detectedDataAnchor,
let node = self.sceneView.node(for: detectedDataAnchor) {
node.transform = SCNMatrix4(hitTestResult.worldTransform)
} else {
self.detectedDataAnchor = ARAnchor(transform: hitTestResult.worldTransform)
self.sceneView.session.add(anchor: self.detectedDataAnchor!)
}
}
}
func renderer(_ renderer: SCNSceneRenderer, nodeFor anchor: ARAnchor) -> SCNNode? {
if self.detectedDataAnchor?.identifier == anchor.identifier {
let sphere = SCNSphere(radius: 0.02)
sphere.firstMaterial?.diffuse.contents = UIColor.red
let sphereNode = SCNNode(geometry: sphere)
sphereNode.transform = SCNMatrix4(anchor.transform)
return sphereNode
}
return nil
}
}

Related

ARSession CurrentFrame is missing the AR interpretation Model Entities

I have the following ARView:
import SwiftUI
import UIKit
import RealityKit
import ARKit
struct ARViewContainer: UIViewRepresentable {
#EnvironmentObject var selectedFood: SelectedFood
#EnvironmentObject var arSession: ARSessionObservable
func makeCoordinator() -> Coordinator {
Coordinator(self)
}
func makeUIView(context: Context) -> ARView {
let arView = ARView(frame: .zero)
let config = ARWorldTrackingConfiguration()
config.planeDetection = [.vertical, .horizontal]
config.environmentTexturing = .automatic
if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
config.sceneReconstruction = .mesh
}
arView.session.delegate = context.coordinator
arView.session.run(config)
arSession.session = arView.session
return arView
}
func updateUIView(_ uiView: ARView, context: Context) {
if (!selectedFood.food.image.isEmpty) {
let data = try! Data(contentsOf: URL(string: self.selectedFood.food.image)!)
let fileURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
try! data.write(to: fileURL)
do {
let texture = try TextureResource.load(contentsOf: fileURL)
var material = SimpleMaterial()
material.baseColor = MaterialColorParameter.texture(texture)
material.tintColor = UIColor.white.withAlphaComponent(0.99)
let entity = ModelEntity(mesh: .generatePlane(width: 0.1, height: 0.1), materials: [material])
let anchor = AnchorEntity(.plane(.any, classification: .any, minimumBounds: .zero))
anchor.addChild(entity)
uiView.scene.addAnchor(anchor)
} catch {
print(error.localizedDescription)
}
}
}
class Coordinator: NSObject, ARSessionDelegate, ARSCNViewDelegate {
var arVC: ARViewContainer
init(_ arViewContainer: ARViewContainer) {
self.arVC = arViewContainer
}
func session(_ session: ARSession, didUpdate frame: ARFrame) {
}
func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
}
}
}
And in HomeView i have the following two variables:
#StateObject var arSession: ARSessionObservable = ARSessionObservable()
#State private var capturedImage: UIImage = UIImage()
The following button with action:
Button {
if let capturedFrame = arSession.session.currentFrame {
let ciimg = CIImage(cvPixelBuffer: capturedFrame.capturedImage)
if let cgImage = convertCIImageToCGImage(inputImage: ciimg) {
capturedImage = UIImage(cgImage: cgImage).rotate(radians: .pi / 2)
self.isShowingMail = true
}
}
} label: {
Image("ShareScreen")
.resizable()
.aspectRatio(contentMode:.fit)
.frame(width: 66, height: 66, alignment: .center)
}
Which takes the currentFrame from the session and opens a Mail sharing model with attachment:
.sheet(isPresented: $isShowingMail) {
MailComposeViewController(toRecipients: [], mailBody: nil, imageAttachment: capturedImage) {
self.isShowingMail = false
}
The mail sharing:
func makeUIViewController(context: UIViewControllerRepresentableContext<MailComposeViewController>) -> MFMailComposeViewController {
let mail = MFMailComposeViewController()
mail.mailComposeDelegate = context.coordinator
mail.setToRecipients(self.toRecipients)
if let body = mailBody {
mail.setMessageBody(body, isHTML: true)
}
if let image = imageAttachment {
if let imageData = image.pngData() {
mail.addAttachmentData(imageData, mimeType: "image/png", fileName: "image.png")
}
}
return mail
}
The problem is that on the preview there are present the Model Entities, photo below:
And when i press share, on the mail preview the model is missing from the frame:
I managed to make it work by moving arView: ARView! outside the ARViewContainer
var arView: ARView!
struct ARViewContainer: UIViewRepresentable {
func makeUIView(context: Context) -> ARView {
arView = ARView(frame: .zero)
let config = ARWorldTrackingConfiguration()
config.planeDetection = [.vertical, .horizontal]
config.environmentTexturing = .automatic
if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
config.sceneReconstruction = .mesh
}
arView.session.delegate = context.coordinator
arView.session.run(config)
return arView
}
}
And then calling the snapshot function to arView in the other View:
Button {
arView.snapshot(saveToHDR: false) { image in
let image = UIImage(data: (image?.pngData())!)
capturedImage = image!
self.isShowingMail = true
}

How can I record AVDepthData video and save in the gallery?

I am developing an application to record RGB-D sequences with the iPhone by using the DualRearCamera or the TrueDepthCamera. I can capture and visualize the RGB frame and depth frames and I developed a version where I can compress this data and save in the internal files of the iPhone. Nevertheless, my idea is to save both sequences (RGB and depth map sequences) in the gallery, but I am having problems to use AVAssetWritter and create a depth map video.
I am using the iPhone X, Xcode 10.2.1 and swift 5
import UIKit
import AVFoundation
import AssetsLibrary
var noMoreSpace = false
class ViewController: UIViewController{
#IBOutlet weak var previewView: UIImageView!
#IBOutlet weak var timeLabel: UILabel!
#IBOutlet weak var previewModeControl: UISegmentedControl!
let session = AVCaptureSession()
let dataOutputQueue = DispatchQueue(label: "video data queue")
let videoOutput = AVCaptureVideoDataOutput()
let movieOutput = AVCaptureMovieFileOutput()
let depthOutput = AVCaptureDepthDataOutput()
let depthCapture = DepthCapture()
var previewLayer = AVCaptureVideoPreviewLayer()
var inputDevice: AVCaptureDeviceInput!
let videoDeviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInDualCamera, .builtInTrueDepthCamera], mediaType: .video, position: .unspecified)
var Timestamp: String {
let currentDate = NSDate()
let dateFormatter = DateFormatter()
dateFormatter.dateFormat = "ddMM_HHmmss"
return "\(dateFormatter.string(from: currentDate as Date))"
}
var isRecording = false
var time = 0
var timer = Timer()
enum PreviewMode: Int {
case original
case depth
}
var previewMode = PreviewMode.original
var depthMap: CIImage?
var scale: CGFloat = 0.0
//let sessionQueue = DispatchQueue(label: "session queue")
override func viewDidLoad() {
super.viewDidLoad()
timeLabel.isHidden = true //TODO: Disable the rest of the UI
previewMode = PreviewMode(rawValue: previewModeControl.selectedSegmentIndex) ?? .original
configureCaptureSession()
session.startRunning()
}
func configureCaptureSession() {
session.beginConfiguration()
let camera = AVCaptureDevice.default(.builtInTrueDepthCamera, for: .video, position: .unspecified)!
do {
let cameraInput = try AVCaptureDeviceInput(device: camera)
if session.canAddInput(cameraInput){
session.sessionPreset = .vga640x480
session.addInput(cameraInput)
self.inputDevice = cameraInput
}
if session.canAddOutput(videoOutput){
videoOutput.setSampleBufferDelegate(self, queue: dataOutputQueue)
videoOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA]
session.addOutput(videoOutput)
let videoConnection = videoOutput.connection(with: .video)
videoConnection?.videoOrientation = .portrait
//previewLayer = AVCaptureVideoPreviewLayer(session: session)
//previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
//previewLayer.connection?.videoOrientation = AVCaptureVideoOrientation.portrait
//previewView.layer.addSublayer(previewLayer)
//previewLayer.position = CGPoint(x: self.previewView.frame.width / 2, y: self.previewView.frame.height / 2)
//previewLayer.bounds = previewView.frame
}
//Add Depth output to the session
if session.canAddOutput(depthOutput){
session.addOutput(depthOutput)
depthOutput.setDelegate(self, callbackQueue: dataOutputQueue)
depthOutput.isFilteringEnabled = true
let depthConnection = depthOutput.connection(with: .depthData)
depthConnection?.videoOrientation = .portrait
}
/*if session.canAddOutput(movieOutput){
session.addOutput(movieOutput)
}*/
} catch {
print("Error")
}
let outputRect = CGRect(x: 0, y: 0, width: 1, height: 1)
let videoRect = videoOutput.outputRectConverted(fromMetadataOutputRect: outputRect)
let depthRect = depthOutput.outputRectConverted(fromMetadataOutputRect: outputRect)
// Calculate the scaling factor between videoRect and depthRect
scale = max(videoRect.width, videoRect.height) / max(depthRect.width, depthRect.height)
// Change the AVCaptureDevice configuration, so you need to lock it
do{
try camera.lockForConfiguration()
// Set the AVCaptureDevice‘s minimum frame duration (which is the inverse of the maximum frame rate) to be equal to the supported frame rate of the depth data
if let frameDuration = camera.activeDepthDataFormat?.videoSupportedFrameRateRanges.first?.minFrameDuration{
camera.activeVideoMinFrameDuration = frameDuration
}
// Unlock the configuration you locked
camera.unlockForConfiguration()
}catch{
fatalError(error.localizedDescription)
}
session.commitConfiguration()
}
#IBAction func startStopRecording(_ sender: Any) {
if isRecording{
stopRecording()
} else {
startRecording()
}
}
func startRecording(){
timeLabel.isHidden = false
timer = Timer.scheduledTimer(timeInterval: 1, target: self, selector: #selector(ViewController.timerAction), userInfo: nil, repeats: true)
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
let flagTime = Timestamp
let auxStr = flagTime+"_output.mp4"
let fileUrl = paths[0].appendingPathComponent(auxStr)
depthCapture.prepareForRecording(timeFlag: flagTime)
movieOutput.startRecording(to: fileUrl, recordingDelegate: self)
print(fileUrl.absoluteString)
print("Recording started")
self.isRecording = true
}
func stopRecording(){
timeLabel.isHidden = true
timer.invalidate()
time = 0
timeLabel.text = "0"
movieOutput.stopRecording()
print("Stopped recording!")
self.isRecording = false
do {
try depthCapture.finishRecording(success: { (url: URL) -> Void in
print(url.absoluteString)
})
} catch {
print("Error while finishing depth capture.")
}
}
#objc func timerAction() {
time += 1
timeLabel.text = String(time)
}
#IBAction func previeModeChanged(_ sender: UISegmentedControl) {
previewMode = PreviewMode(rawValue: previewModeControl.selectedSegmentIndex) ?? .original
}
#IBAction func switchCamera(_ sender: Any) {
let currentDevice = self.inputDevice.device
let currentPosition = currentDevice.position
let preferredPosition: AVCaptureDevice.Position
let preferredDeviceType: AVCaptureDevice.DeviceType
let devices = self.videoDeviceDiscoverySession.devices
var newVideoDevice: AVCaptureDevice? = nil
switch currentPosition {
case .unspecified, .front:
preferredPosition = .back
preferredDeviceType = .builtInDualCamera
case .back:
preferredPosition = .front
preferredDeviceType = .builtInTrueDepthCamera
#unknown default:
preferredPosition = .back
preferredDeviceType = .builtInDualCamera
}
// First, seek a device with both the preferred position and device type. Otherwise, seek a device with only the preferred position. ENTENDER MEJOR LQS CONDICIONES
if let device = devices.first(where: { $0.position == preferredPosition && $0.deviceType == preferredDeviceType }) {
newVideoDevice = device
} else if let device = devices.first(where: { $0.position == preferredPosition }) {
newVideoDevice = device
}
if let videoDevice = newVideoDevice {
do {
let cameraInput = try AVCaptureDeviceInput(device: videoDevice)
self.session.beginConfiguration()
self.session.removeInput(self.inputDevice)
if self.session.canAddInput(cameraInput) {
session.sessionPreset = .vga640x480
self.session.addInput(cameraInput)
self.inputDevice = cameraInput
}else {
self.session.addInput(self.inputDevice)
}
self.session.commitConfiguration()
} catch{
print("Error occurred while creating video device input: \(error)")
}
}
}
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate{
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
let image = CIImage(cvPixelBuffer: pixelBuffer!)
let previewImage: CIImage
switch previewMode {
case .original:
previewImage = image
case .depth:
previewImage = depthMap ?? image
}
let displayImage = UIImage(ciImage: previewImage)
DispatchQueue.main.async {[weak self] in self?.previewView.image = displayImage}
}
}
extension ViewController: AVCaptureDepthDataOutputDelegate{
func depthDataOutput(_ output: AVCaptureDepthDataOutput, didOutput depthData: AVDepthData, timestamp: CMTime, connection: AVCaptureConnection) {
var convertedDepth: AVDepthData
// Ensure the depth data is the format you need: 32 bit FP disparity.???
if depthData.depthDataType != kCVPixelFormatType_DepthFloat16{
convertedDepth = depthData.converting(toDepthDataType: kCVPixelFormatType_DepthFloat32)
}else{
convertedDepth = depthData
}
// You save the depth data map from the AVDepthData object as a CVPixelBuffer
let pixelBuffer = convertedDepth.depthDataMap
//Using an extension, you then clamp the pixels in the pixel buffer to keep them between 0.0 and 1.0.
pixelBuffer.clamp()
// Convert the pixel buffer into a CIImage
let depthMap = CIImage(cvPixelBuffer: pixelBuffer)
// You store depthMap in a class variable for later use
DispatchQueue.main.async {
[weak self] in self?.depthMap = depthMap
}
}
}

Display a 3D object forever in ARkit

I am trying to recognise an object to displaying 3D model,
and it’s work when I put the camera on the object the 3D model displayed
my problem is 3d model displayed only for around 5 second then it disappear
so how can I display my 3d model forever?
This my code section:
class ViewController: UIViewController, UITextFieldDelegate
{
var proximityObserver: ProximityObserver! // Beacon declaration
#IBOutlet weak var sceneView: ARSCNView!
//#IBOutlet var sceneView: ARSCNView!
let fadeDuration: TimeInterval = 0.3
let rotateDuration: TimeInterval = 3
let waitDuration: TimeInterval = 0.5
lazy var fadeAndSpinAction: SCNAction = {
return .sequence([
.fadeIn(duration: fadeDuration),
.rotateBy(x: 0, y: 0, z: CGFloat.pi * 360 / 180, duration: rotateDuration),
.wait(duration: waitDuration),
.fadeOut(duration: fadeDuration)
])
}()
lazy var fadeAction: SCNAction = {
return .sequence([
.fadeOpacity(by: 0.8, duration: fadeDuration),
.wait(duration: waitDuration),
.fadeOut(duration: fadeDuration)
])
}()
lazy var bookNode: SCNNode = {
guard let scene = SCNScene(named: "art.scnassets/book.scn"),
let node = scene.rootNode.childNode(withName: "book", recursively: true) else { return SCNNode() }
let scaleFactor = 0.1
node.scale = SCNVector3(scaleFactor, scaleFactor, scaleFactor)
return node
}()
override func viewDidLoad() {
super.viewDidLoad()
let estimoteCloudCredentials = CloudCredentials(appID: "reem-badr-s-proximity-for--6o4", appToken: "8be2dff5dc16b9747b7fafe97ff53708")
proximityObserver = ProximityObserver(credentials: estimoteCloudCredentials, onError: { error in
print("ProximityObserver error: \(error)")
})
let zone = ProximityZone(tag: "reem-badr-s-proximity-for--6o4", range: ProximityRange.near)
zone.onEnter = { contexts in
print("enter")
self.sceneView.delegate = self as ARSCNViewDelegate
self.configureLighting()
}
zone.onExit =
{ contexts in
print("Exit")
}
proximityObserver.startObserving([zone])
}
func configureLighting() {
sceneView.autoenablesDefaultLighting = true
sceneView.automaticallyUpdatesLighting = true
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
resetTrackingConfiguration()
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
sceneView.session.pause()
}
#IBAction func resetButtonDidTouch(_ sender: UIBarButtonItem) {
resetTrackingConfiguration()
}
func resetTrackingConfiguration()
{
guard let referenceImages = ARReferenceImage.referenceImages(inGroupNamed: "AR Resources", bundle: nil) else { return }
let configuration = ARWorldTrackingConfiguration()
configuration.detectionImages = referenceImages
sceneView.session.run(configuration)
}
}
func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor)
{
DispatchQueue.main.async {
guard let imageAnchor = anchor as? ARImageAnchor,
let imageName = imageAnchor.referenceImage.name else { return }
let overlayNode = self.getNode(withImageName: imageName)
overlayNode.opacity = 0
overlayNode.position.y = 0.2
overlayNode.runAction(self.fadeAndSpinAction)
node.addChildNode(overlayNode)
}
func getPlaneNode(withReferenceImage image: ARReferenceImage) -> SCNNode {
let plane = SCNPlane(width: image.physicalSize.width,
height: image.physicalSize.height)
let node = SCNNode(geometry: plane)
return node
}
func getNode(withImageName name: String) -> SCNNode
{
var node = SCNNode()
switch name
{
case "Book":
node = bookNode
default:
break
}
return node
}
}
and so on...

Can I get 3d models from web servers on Swift?

I'm working on an application with Arkit. There are many 3D models and the size is big in my app. Can I get these models out of another server (outside sites)? I'm new on swift, I can't seem to find anything on loading a 3d model from a web server.
is it enough to change the model path there? Thank you
func loadModel() {
guard let virtualObjectScene = SCNScene(named: "\(modelName).\(fileExtension)", inDirectory: "Models.scnassets/\(modelName)") else {
return
}
let wrapperNode = SCNNode()
for child in virtualObjectScene.rootNode.childNodes {
let defaults = UserDefaults.standard
wrapperNode.addChildNode(child)
}
self.addChildNode(wrapperNode)
}
All code:
import UIKit
import SceneKit
import ARKit
class VirtualObject: SCNNode {
var modelName: String = ""
var fileExtension: String = ""
var thumbImage: UIImage!
var title: String = ""
var viewController: ViewController?
override init() {
super.init()
self.name = "Virtual object root node"
}
init(modelName: String, fileExtension: String, thumbImageFilename: String, title: String) {
super.init()
self.name = "Virtual object root node"
self.modelName = modelName
self.fileExtension = fileExtension
self.thumbImage = UIImage(named: thumbImageFilename)
self.title = title
}
required init?(coder aDecoder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
func loadModel() {
guard let virtualObjectScene = SCNScene(named: "\(modelName).\(fileExtension)", inDirectory: "Models.scnassets/\(modelName)") else {
return
}
let wrapperNode = SCNNode()
for child in virtualObjectScene.rootNode.childNodes {
let defaults = UserDefaults.standard
wrapperNode.addChildNode(child)
}
self.addChildNode(wrapperNode)
}
func unloadModel() {
self.removeFromParentNode()
for child in self.childNodes {
child.removeFromParentNode()
}
}
func translateBasedOnScreenPos(_ pos: CGPoint, instantly: Bool, infinitePlane: Bool) {
guard let controller = viewController else {
return
}
let result = controller.worldPositionFromScreenPosition(pos, objectPos: self.position, infinitePlane: infinitePlane)
controller.moveVirtualObjectToPosition(result.position, instantly, !result.hitAPlane)
}
}
extension VirtualObject {
static func isNodePartOfVirtualObject(_ node: SCNNode) -> Bool {
if node.name == "Virtual object root node" {
return true
}
if node.parent != nil {
return isNodePartOfVirtualObject(node.parent!)
}
return false
}
static let availableObjects: [VirtualObject] = [
Anatomy()
]
}
you can load an scn file from a webserver with ip addresses like this (i used a fake ip below)
let myURL = NSURL(string: “http://110.151.153.202:80/scnfiles/myfile.scn”)
let scene = try! SCNScene(url: myURL! as URL, options:nil)
Edit:
Here’s a simple Swift PlayGrounds which pulls a test cube scn file from my github repo. You just tap anywhere and the cube loads.
import ARKit
import SceneKit
import PlaygroundSupport
class ViewController: NSObject {
var sceneView: ARSCNView
init(sceneView: ARSCNView) {
self.sceneView = sceneView
super.init()
self.setupWorldTracking()
self.sceneView.addGestureRecognizer(UITapGestureRecognizer(target: self, action: #selector(ViewController.handleTap(_:))))
}
private func setupWorldTracking() {
if ARWorldTrackingConfiguration.isSupported {
let configuration = ARWorldTrackingConfiguration()
configuration.planeDetection = .horizontal
configuration.isLightEstimationEnabled = true
self.sceneView.session.run(configuration, options: [])
}
}
#objc func handleTap(_ gesture: UITapGestureRecognizer) {
let results = self.sceneView.hitTest(gesture.location(in: gesture.view), types: ARHitTestResult.ResultType.featurePoint)
guard let result: ARHitTestResult = results.first else {
return
}
// pulls cube.scn from github repo
let myURL = NSURL(string: "https://raw.githubusercontent.com/wave-electron/scnFile/master/cube.scn")
let scene = try! SCNScene(url: myURL! as URL, options: nil)
let node = scene.rootNode.childNode(withName: "SketchUp", recursively: true)
node?.scale = SCNVector3(0.01,0.01,0.01)
let position = SCNVector3Make(result.worldTransform.columns.3.x, result.worldTransform.columns.3.y, result.worldTransform.columns.3.z)
node?.position = position
self.sceneView.scene.rootNode.addChildNode(node!)
}
}
let sceneView = ARSCNView()
let viewController = ViewController(sceneView: sceneView)
sceneView.autoenablesDefaultLighting = true
PlaygroundPage.current.needsIndefiniteExecution = true
PlaygroundPage.current.liveView = viewController.sceneView

Objects Track using vision framework in iOS 11

I want to detect object and track that object using vision framework. I am successfully done with detect objects and little bit with tracking also but I don't get so much accuracy with tracking.
I want much more accuracy while converting frames as its frequently lost the accuracy while track the objects.
Please check the below code for detect and track the objects:
import UIKit
import AVFoundation
import Vision
class ViewController: UIViewController {
private lazy var captureSession: AVCaptureSession = {
let session = AVCaptureSession()
session.sessionPreset = AVCaptureSession.Preset.photo
guard let backCamera = AVCaptureDevice.default(for: .video),
let input = try? AVCaptureDeviceInput(device: backCamera) else
{
return session
}
session.addInput(input)
return session
}()
private lazy var cameraLayer: AVCaptureVideoPreviewLayer =
AVCaptureVideoPreviewLayer(session: self.captureSession)
private let handler = VNSequenceRequestHandler()
fileprivate var lastObservation: VNDetectedObjectObservation?
lazy var highlightView: UIView = {
let view = UIView()
view.layer.borderColor = UIColor.red.cgColor
view.layer.borderWidth = 4
view.backgroundColor = .clear
return view
}()
override func viewDidLoad() {
super.viewDidLoad()
view.layer.addSublayer(cameraLayer)
view.addSubview(highlightView)
let output = AVCaptureVideoDataOutput()
output.setSampleBufferDelegate(self, queue: DispatchQueue(label:
"queue"))
captureSession.addOutput(output)
captureSession.startRunning()
let tapGestureRecognizer = UITapGestureRecognizer(target: self,
action: #selector(tapAction))
view.addGestureRecognizer(tapGestureRecognizer)
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
cameraLayer.frame = view.bounds
}
// MARK: - Actions
#objc private func tapAction(recognizer: UITapGestureRecognizer) {
highlightView.frame.size = CGSize(width: 120, height: 120)
highlightView.center = recognizer.location(in: view)
let originalRect = highlightView.frame
var convertedRect =
cameraLayer.metadataOutputRectConverted(fromLayerRect:
originalRect)
convertedRect.origin.y = 1 - convertedRect.origin.y
lastObservation = VNDetectedObjectObservation(boundingBox:
convertedRect)
}
fileprivate func handle(_ request: VNRequest, error: Error?) {
DispatchQueue.main.async {
guard let newObservation = request.results?.first as?
VNDetectedObjectObservation else {
return
}
self.lastObservation = newObservation
var transformedRect = newObservation.boundingBox
transformedRect.origin.y = 1 - transformedRect.origin.y
let convertedRect =
self.cameraLayer.layerRectConverted(fromMetadataOutputRect:
transformedRect)
self.highlightView.frame = convertedRect
}
}
}
extension ViewController:
AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer:
CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer =
CMSampleBufferGetImageBuffer(sampleBuffer),
let observation = lastObservation else {
return
}
let request = VNTrackObjectRequest(detectedObjectObservation:
observation) { [unowned self] request, error in
self.handle(request, error: error)
}
request.trackingLevel = .accurate
do {
try handler.perform([request], on: pixelBuffer)
}
catch {
print(error)
}
}
}
Any help will be appreciated!!
Thanks.
I am not so good at vision and core ml, but apparently your code looks fine. One thing you can do is check when vision does not get any tracking in the buffer, you have to mark its property isLastFrame true if tracking request confidence value falls to 0.
if !trackingRequest.isLastFrame {
if observation.confidence > 0.7 {
trackingRequest.inputObservation = observation
} else {
trackingRequest.isLastFrame = true
}
newTrackingRequests.append(trackingRequest)
}
This way its easy to find out whether vision tracking request lost tracking object or it just tracking the wrong object.