CIImage pixelBuffer always return nil - swift

I am doing some task to apply filter effect in to my WebRTC call, follow this tutorial:
Here is my code to convert:
func capturer(_ capturer: RTCVideoCapturer, didCapture frame: RTCVideoFrame) {
let pixelBufferr = frame.buffer as! RTCCVPixelBuffer
let pixelBufferRef = pixelBufferr.pixelBuffer
if #available(iOS 15.0, *) { {
if let output = GreetingProcessor.shared.processVideoFrame(
foreground: pixelBufferRef,
background: self.vbImage) {
print("new output: \(output) => \(output.pixelBuffer) + \(self.buffer(from: output))")
guard let px = output.pixelBuffer else { return }
let rtcPixelBuffer = RTCCVPixelBuffer(pixelBuffer: px)
let i420buffer = rtcPixelBuffer.toI420()
let newFrame = RTCVideoFrame(buffer: i420buffer, rotation: frame.rotation, timeStampNs: frame.timeStampNs)
self.videoSource.capturer(capturer, didCapture: newFrame)
THen here is how I apply effect:
func blendImages(
background: CIImage,
foreground: CIImage,
mask: CIImage,
isRedMask: Bool = false
) -> CIImage? {
// scale mask
let maskScaleX = foreground.extent.width / mask.extent.width
let maskScaleY = foreground.extent.height / mask.extent.height
let maskScaled = mask.transformed(by: __CGAffineTransformMake(maskScaleX, 0, 0, maskScaleY, 0, 0))
// scale background
let backgroundScaleX = (foreground.extent.width / background.extent.width)
let backgroundScaleY = (foreground.extent.height / background.extent.height)
let backgroundScaled = background.transformed(
by: __CGAffineTransformMake(backgroundScaleX, 0, 0, backgroundScaleY, 0, 0))
let blendFilter = isRedMask ? CIFilter.blendWithRedMask() : CIFilter.blendWithMask()
blendFilter.inputImage = foreground
blendFilter.backgroundImage = backgroundScaled
blendFilter.maskImage = maskScaled
return blendFilter.outputImage
The problem is output.pixelBuffer always nil, so I can not create RTCFrame to pass it again to delegate
Can someone help?


Get snapshot from AVCaptureSession contaning Visionkit face detection elements

I use AVCaptureSession to setup a camera view and using vision kit to detect and add a rectangular on the face.
Here is how I can do it
override func viewDidLoad() {
fileprivate func prepareVisionRequest() {
//self.trackingRequests = []
var requests = [VNTrackObjectRequest]()
let faceDetectionRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in
if error != nil {
print("FaceDetection error: \(String(describing: error)).")
guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest,
let results = faceDetectionRequest.results else {
DispatchQueue.main.async {
// Add the observations to the tracking list
for observation in results {
let faceTrackingRequest = VNTrackObjectRequest(detectedObjectObservation: observation)
faceTrackingRequest.trackingLevel = .fast
self.trackingRequests = requests
// Start with detection. Find face, then track it.
self.detectionRequests = [faceDetectionRequest]
self.sequenceRequestHandler = VNSequenceRequestHandler()
// MARK: Drawing Vision Observations
fileprivate func setupVisionDrawingLayers() {
let captureDeviceResolution = self.captureDeviceResolution
let captureDeviceBounds = CGRect(x: 0,
y: 0,
width: captureDeviceResolution.width,
height: captureDeviceResolution.height)
let captureDeviceBoundsCenterPoint = CGPoint(x: captureDeviceBounds.midX,
y: captureDeviceBounds.midY)
let normalizedCenterPoint = CGPoint(x: 0.5, y: 0.5)
guard let rootLayer = self.rootLayer else {
self.presentErrorAlert(message: "view was not property initialized")
let overlayLayer = CALayer() = "DetectionOverlay"
overlayLayer.masksToBounds = true
overlayLayer.anchorPoint = normalizedCenterPoint
overlayLayer.bounds = captureDeviceBounds
overlayLayer.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
let faceRectangleShapeLayer = CAShapeLayer() = "RectangleOutlineLayer"
faceRectangleShapeLayer.bounds = captureDeviceBounds
faceRectangleShapeLayer.anchorPoint = normalizedCenterPoint
faceRectangleShapeLayer.position = captureDeviceBoundsCenterPoint
faceRectangleShapeLayer.fillColor = UIColor.white.withAlphaComponent(0.9).cgColor
// faceLandmarksShapeLayer.strokeColor = UIColor.white.withAlphaComponent(0.7).cgColor
faceRectangleShapeLayer.lineWidth = 5
faceRectangleShapeLayer.shadowOpacity = 0.7
faceRectangleShapeLayer.shadowRadius = 5
let faceLandmarksShapeLayer = CAShapeLayer() = "FaceLandmarksLayer"
faceLandmarksShapeLayer.bounds = captureDeviceBounds
faceLandmarksShapeLayer.anchorPoint = normalizedCenterPoint
faceLandmarksShapeLayer.position = captureDeviceBoundsCenterPoint
faceLandmarksShapeLayer.fillColor = nil
faceLandmarksShapeLayer.strokeColor = nil
self.detectionOverlayLayer = overlayLayer
self.detectedFaceRectangleShapeLayer = faceRectangleShapeLayer
self.detectedFaceLandmarksShapeLayer = faceLandmarksShapeLayer
Now, I'm trying three ways to take snapshots
1- using UIGraphicsImageRenderer, it shows only the rectangular on the face and the camera view in not visible - it's black
2- Take image from captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) - the image from the buffer shows only the camera view, no rectangular
3- use AVCapturePhotoCaptureDelegate to capture photo from AVCaptureSession, the captured photo shows only the camera view, no rectangular
Could you please help me to take snapshot that contains both camera view and the rectangular! Thanks

CGBitmapContext 2x as slow compared to CALayer's draw()

I have some code I can't change that expects to be able to draw at any time. It's the main() function in BackgroundThread below - pretend it can't be modified in any way. Running this will use 70-80% CPU.
If instead of running the thread I replicate what it is doing in View::draw() (i.e. draw 5000 white rectangles at random positions), this will use about 30% CPU.
Where's the difference coming from? Looking at Instruments, although the call stack is the same starting from CGContextFillRect, the View::draw() version only spends 16% of the time doing memset() whereas the threaded version spends 80% of the time.
The code below is the FAST version. Comment out the FAST lines and uncomment the SLOW lines to switch to the SLOW (threaded) version. Compile with swiftc test.swift -otest && ./test. I'm on macOS 10.13, integrated graphics, if that matters.
Is there anything I can do to make the threaded version as fast as the View::draw() version?
import Cocoa
let NSApp = NSApplication.shared,
vwaitSem = DispatchSemaphore(value: 0)
mainWindow: NSWindow?,
screen: CGContext?,
link: CVDisplayLink?
class View: NSView, CALayerDelegate {
var lastTime: CFTimeInterval = 0
override var acceptsFirstResponder: Bool {return true}
required init(coder aDecoder: NSCoder) {fatalError("This class does not support NSCoding")}
override func makeBackingLayer() -> CALayer {return CALayer()}
override init(frame: CGRect) {
super.init(frame: frame)
self.wantsLayer = true
self.layer?.contentsScale = 2.0
self.layer?.backgroundColor = CGColor(red:0, green:0, blue:0, alpha: 1)
self.layerContentsRedrawPolicy = NSView.LayerContentsRedrawPolicy.onSetNeedsDisplay // FAST
func draw(_ layer: CALayer, in ctx: CGContext) {
let now = CACurrentMediaTime(), timePassed = ((now-lastTime)*1000).rounded()
// NSLog("\(timePassed)")
lastTime = now
for _ in 0...5000 {
let rect = CGRect(x: CGFloat(arc4random_uniform(640)+1), y: CGFloat(arc4random_uniform(480)+1), width:6, height:6)
func displayLinkOutputCallback(_ displayLink: CVDisplayLink, _ nowPtr: UnsafePointer<CVTimeStamp>,
_ outputTimePtr: UnsafePointer<CVTimeStamp>, _ flagsIn: CVOptionFlags, _ flagsOut: UnsafeMutablePointer<CVOptionFlags>,
_ displayLinkContext: UnsafeMutableRawPointer?) -> CVReturn {
DispatchQueue.main.async {
// mainWindow!.contentView!.layer!.contents = screen!.makeImage() // SLOW
mainWindow!.contentView!.display() // FAST
return kCVReturnSuccess
class BackgroundThread: Thread {
var lastTime: CFTimeInterval = 0
override func main() {
while true {
let now = CACurrentMediaTime(), timePassed = ((now-lastTime)*1000).rounded()
// NSLog("\(timePassed)")
lastTime = now
screen?.clear(CGRect(x:0, y:0, width:640*2, height:480*2))
for _ in 0...5000 {
screen?.fill(CGRect(x: CGFloat(arc4random_uniform(640*2)+1), y: CGFloat(arc4random_uniform(480*2)+1), width: 6*2, height: 6*2))
let width = 640, height = 480,
appMenuItem = NSMenuItem(),
quitMenuItem = NSMenuItem(title:"Quit",
action:#selector(NSApplication.terminate), keyEquivalent:"q"),
window = NSWindow(contentRect:NSMakeRect(0,0, CGFloat(width), CGFloat(height)),
styleMask:[.closable,.titled], backing:.buffered, defer:false),
colorProfile = ColorSyncProfileCreateWithDisplayID(0),
colorSpace = CGColorSpace(platformColorSpaceRef: colorProfile!.toOpaque()),
screen_ = CGContext(data: nil, width: Int(width)*2, height:Int(height)*2, bitsPerComponent:8, bytesPerRow: 0,
space: colorSpace!, bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue),
backgroundThread = BackgroundThread()
NSApp.mainMenu = NSMenu()
appMenuItem.submenu = NSMenu()
window.contentView = View()
mainWindow = window
screen = screen_
CVDisplayLinkCreateWithCGDisplay(CGMainDisplayID(), &link)
CVDisplayLinkSetOutputCallback(link!, displayLinkOutputCallback, UnsafeMutableRawPointer(Unmanaged.passUnretained(window).toOpaque()))
// backgroundThread.start() // SLOW
I misread the note in the documentation for makeImage() and thought it would not copy the data unless it really had to. Well, Instruments shows it does copy the data. Every single frame.
So I switched to Metal and now I can draw from the background thread with the same performance/CPU usage as with CGContext alone, with no copies as far as I can tell.
Here's some working code:
import Cocoa
import MetalKit
class View: MTKView {
var screen: CGContext?
var commandQueue: MTLCommandQueue?
var buffer: MTLBuffer?
var texture: MTLTexture?
var vwaitSem = DispatchSemaphore(value: 0)
var backgroundThread: Thread?
var allocationSize = 0
func alignUp(size: Int, align: Int) -> Int {return (size+(align-1)) & ~(align-1)}
override var acceptsFirstResponder: Bool {return true}
required init(coder aDecoder: NSCoder) {fatalError("This class does not support NSCoding")}
init() {super.init(frame: CGRect(x:0, y:0, width:0, height: 0), device: MTLCreateSystemDefaultDevice())}
override func viewDidMoveToWindow() {
layer?.contentsScale = NSScreen.main!.backingScaleFactor
let metalLayer = layer as! CAMetalLayer
let pixelRowAlignment = metalLayer.device!.minimumLinearTextureAlignment(for: metalLayer.pixelFormat)
let bytesPerRow = alignUp(size: Int(layer!.frame.width)*Int(layer!.contentsScale)*4, align: pixelRowAlignment)
let pagesize = Int(getpagesize())
var data: UnsafeMutableRawPointer? = nil
allocationSize = alignUp(size: bytesPerRow*Int(layer!.frame.height)*Int(layer!.contentsScale), align: pagesize)
posix_memalign(&data, pagesize, allocationSize)
let colorProfile = ColorSyncProfileCreateWithDisplayID(0),
colorSpace = CGColorSpace(platformColorSpaceRef: colorProfile!.toOpaque()),
screen_ = CGContext(data: data,
width: Int(layer!.frame.width)*Int(layer!.contentsScale),
height: Int(layer!.frame.height)*Int(layer!.contentsScale),
bitsPerComponent:8, bytesPerRow: bytesPerRow,
space: colorSpace!, bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue)!,
buffer_ = metalLayer.device!.makeBuffer(bytesNoCopy: data!, length: allocationSize, options: .storageModeManaged,
deallocator: { pointer, length in free(self.screen!.data!) })!,
textureDescriptor = MTLTextureDescriptor()
textureDescriptor.pixelFormat = metalLayer.pixelFormat
textureDescriptor.width = screen_.width
textureDescriptor.height = screen_.height
textureDescriptor.storageMode = buffer_.storageMode
textureDescriptor.usage = MTLTextureUsage(rawValue: MTLTextureUsage.shaderRead.rawValue)
texture = buffer_.makeTexture(descriptor: textureDescriptor, offset: 0, bytesPerRow: screen_.bytesPerRow)
commandQueue = device?.makeCommandQueue()
screen = screen_
buffer = buffer_
backgroundThread = BackgroundThread(screen: screen!, vwaitSem: vwaitSem)
override func draw(_ dirtyRect: NSRect) {
if let drawable = currentDrawable {
texture!.replace(region: MTLRegionMake2D(0,0, screen!.width, screen!.height),
mipmapLevel:0, slice:0, withBytes: screen!.data!, bytesPerRow: screen!.bytesPerRow, bytesPerImage: 0)
let commandBuffer = commandQueue!.makeCommandBuffer()!
let blitPass = commandBuffer.makeBlitCommandEncoder()!
blitPass.copy(from: texture!, sourceSlice:0, sourceLevel:0, sourceOrigin: MTLOrigin(x:0,y:0,z:0),
sourceSize: MTLSize(width:screen!.width, height:screen!.height, depth: 1),
to: drawable.texture, destinationSlice:0, destinationLevel:0, destinationOrigin: MTLOrigin(x:0,y:0,z:0))
if let renderPass = currentRenderPassDescriptor {
renderPass.colorAttachments[0].texture = drawable.texture
renderPass.colorAttachments[0].loadAction = .load
commandBuffer.makeRenderCommandEncoder(descriptor: renderPass)!.endEncoding()
commandBuffer.addCompletedHandler {cb in self.vwaitSem.signal()}
class BackgroundThread: Thread {
var screen: CGContext
var vwaitSem: DispatchSemaphore
var x = 0
init(screen:CGContext, vwaitSem:DispatchSemaphore) {
self.screen = screen
self.vwaitSem = vwaitSem
override func main() {
while true {
// screen.clear(CGRect(x:0,y:0, width:screen.width, height:screen.height))
// screen.setFillColor(CGColor.white)
// screen.fill(CGRect(x:x, y:0, width:100, height:100))
// x += 1
screen.clear(CGRect(x:0,y:0, width:screen.width, height:screen.height))
let screenWidth = UInt32(screen.width), screenHeight = UInt32(screen.height)
for _ in 0...5000 {
let rect = CGRect(x: CGFloat(arc4random_uniform(screenWidth+1)),
y: CGFloat(arc4random_uniform(screenHeight+1)), width:6, height:6)
let width = 640, height = 480,
appMenuItem = NSMenuItem(),
quitMenuItem = NSMenuItem(title:"Quit",
action:#selector(NSApplication.terminate), keyEquivalent:"q"),
window = NSWindow(contentRect:NSMakeRect(0,0, CGFloat(width), CGFloat(height)),
styleMask:[.closable,.titled], backing:.buffered, defer:false)
NSApp.mainMenu = NSMenu()
appMenuItem.submenu = NSMenu()
window.contentView = View()

Vision – Face recognition performed but correct coordinates cannot be obtained

Swift 5, Xcode 11, iOS 13.0.
In the code below, we will get the face from the image 'test', recognize the left eye, and display 'num' at its coordinates.
However, the coordinates of the nose are not displayed at the correct position.
I'm in trouble because I don't know the solution. I would be grateful if you could tell me.
import Vision
import Foundation
import SwiftUI
struct ContentView: View {
#ObservedObject var faceGet = test()
#State var uiimage : UIImage? = nil
var body: some View {
if uiimage != nil {
Image(uiImage: uiimage!).resizable().scaledToFit()
Button(action: {
self.uiimage = self.faceGet.faceCheck()
Text("Tap image to see result")
class test :ObservableObject{
private var originalImage = UIImage(named: "test3")
func faceCheck() -> UIImage?{
var drawnImage : UIImage? = originalImage
let request = VNDetectFaceLandmarksRequest { (request, error) in
for observation in request.results as! [VNFaceObservation] {
if let landmark = observation.landmarks?.nose{
for i in 0...landmark.pointCount - 1 {
drawnImage = self.drawText(
image: drawnImage!,
point: landmark.pointsInImage(imageSize: self.originalImage!.size) [i] ,
num: i)
if let cgImage = self.originalImage?.cgImage {
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
try? handler.perform([request])
return drawnImage
func drawText(image :UIImage , point:CGPoint , num:Int ) ->UIImage
let text = num.description
var newImage : UIImage? = nil
let fontSize = image.size.height / image.size.width * 10
let font = UIFont.boldSystemFont(ofSize: fontSize)
let textWidth = CGFloat(round(text.widthOfString(usingFont: font)))
let textHeight = CGFloat(round(text.heightOfString(usingFont: font)))
let imageRect = CGRect(origin: .zero, size: image.size)
image.draw(in: imageRect)
let rePoint :CGPoint = CGPoint(x:imageRect.maxX - CGFloat(round(point.x)),
y:imageRect.maxY - CGFloat(round(point.y)))
let textRect = CGRect(origin: rePoint, size: CGSize(width: textWidth , height: textHeight ))
let textStyle = NSMutableParagraphStyle.default.mutableCopy() as! NSMutableParagraphStyle
let textFontAttributes = [
NSAttributedString.Key.font: font,
NSAttributedString.Key.paragraphStyle: textStyle
text.draw(in: textRect, withAttributes: textFontAttributes)
newImage = UIGraphicsGetImageFromCurrentImageContext();
return newImage!
extension String {
public func widthOfString(usingFont font: UIFont) -> CGFloat {
let attributes = [NSAttributedString.Key.font: font]
let size = self.size(withAttributes: attributes)
return size.width
public func heightOfString(usingFont font: UIFont) -> CGFloat {
let attributes = [NSAttributedString.Key.font: font]
let size = self.size(withAttributes: attributes)
return size.height

Swift: How to take screenshot of hidden window of external application

I have tried the following code:
windowNumber is CGWindowID
func screenshot(imgOption:CGWindowImageOption = .bestResolution ) -> NSImage {
guard let winNumber = self.windowNumber else { return NSImage( size: NSSize(width: 1, height: 1)) }
let inf = CGFloat(FP_INFINITE)
let null = CGRect(x: inf, y: inf, width: 0, height: 0)
let cgImage = CGWindowListCreateImage(null, .optionIncludingWindow, CGWindowID(winNumber), imgOption)
//cgImage == nil
guard let cgImageUnwrapped = cgImage else { return NSImage() }
let image = NSImage(cgImage: cgImageUnwrapped, size: self.frame.value.size )
return image
but with the following code I have a nil result in case of window is hidden.

How Do I Process an Image File to fit buffer dimensions for Vision Framework on MacOS?

I'm trying to make something simple to test Vision Framework on MacOS.
I tried to modify code from this tutorial to use a single image from screenshot instead of camera feed.
However, I get this error:
Error Code=3 "Failed to create image for processing due to invalid requested buffer dimensions"
Is it because screenshot image doesn't fit certain specification? Do I need to preprocess the file?
If so, how can I process it in order to fit the dimensions?
My testing code is below.
import Cocoa
import Vision
class ViewController: NSViewController {
var requests = [VNRequest]()
func start() {
let textRequest = VNDetectTextRectanglesRequest(completionHandler: self.detectTextHandler)
textRequest.reportCharacterBoxes = true
self.requests = [textRequest]
let url = URL(fileURLWithPath:NSString(string:"~/Screenshot.png").expandingTildeInPath)
let imageRequestHandler = VNImageRequestHandler(url:url)
do {
try imageRequestHandler.perform(self.requests)
} catch {
func detectTextHandler(request: VNRequest, error: Error?) {
guard let observations = request.results else {
print("no result")
let result ={$0 as? VNTextObservation})
DispatchQueue.main.async() {
for region in result {
guard let rg = region else {
self.highlightWord(box: rg)
if let boxes = region?.characterBoxes {
for characterBox in boxes {
self.highlightLetters(box: characterBox)
func highlightWord(box: VNTextObservation) {
guard let boxes = box.characterBoxes else {
var maxX: CGFloat = 9999.0
var minX: CGFloat = 0.0
var maxY: CGFloat = 9999.0
var minY: CGFloat = 0.0
for char in boxes {
if char.bottomLeft.x < maxX {
maxX = char.bottomLeft.x
if char.bottomRight.x > minX {
minX = char.bottomRight.x
if char.bottomRight.y < maxY {
maxY = char.bottomRight.y
if char.topRight.y > minY {
minY = char.topRight.y
let xCord = maxX
let yCord = (1 - minY)
let width = (minX - maxX)
let height = (minY - maxY)
let frame = CGRect(x: xCord, y: yCord, width: width, height: height)
print("Word: \(frame)")
func highlightLetters(box: VNRectangleObservation) {
let xCord = box.topLeft.x
let yCord = (1 - box.topLeft.y)
let width = (box.topRight.x - box.bottomLeft.x)
let height = (box.topLeft.y - box.bottomLeft.y)
let frame = CGRect(x: xCord, y: yCord, width: width, height: height)
print("Letter: \(frame)")
override func viewDidLoad() {
// Do any additional setup after loading the view.
override var representedObject: Any? {
didSet {
// Update the view, if already loaded.