I want to use Core Image for processing a bunch of CGImage
objects and turning them into a QuickTime movie on macOS. The following code demonstrates what's needed, but the output contains a lot of blank (black) frames:
import AppKit
import AVFoundation
import CoreGraphics
import Foundation
import CoreVideo
import Metal
// Video output url.
let url: URL = try! FileManager.default.url(for: .downloadsDirectory, in: .userDomainMask, appropriateFor: nil, create: false).appendingPathComponent("av.mov")
try? FileManager.default.removeItem(at: url)
// Video frame size, total frame count, frame rate and frame image.
let frameSize: CGSize = CGSize(width: 2000, height: 1000)
let frameCount: Int = 100
let frameRate: Double = 1 / 30
let frameImage: CGImage
frameImage = NSImage(size: frameSize, flipped: false, drawingHandler: {
NSColor.red.setFill()
$0.fill()
return true
}).cgImage(forProposedRect: nil, context: nil, hints: nil)!
let pixelBufferAttributes: [CFString: Any]
let outputSettings: [String: Any]
pixelBufferAttributes = [
kCVPixelBufferPixelFormatTypeKey: Int(kCVPixelFormatType_32ARGB),
kCVPixelBufferWidthKey: Float(frameSize.width),
kCVPixelBufferHeightKey: Float(frameSize.height),
kCVPixelBufferMetalCompatibilityKey: true,
kCVPixelBufferCGImageCompatibilityKey: true,
kCVPixelBufferCGBitmapContextCompatibilityKey: true,
]
outputSettings = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: Int(frameSize.width),
AVVideoHeightKey: Int(frameSize.height),
]
let writer: AVAssetWriter = try! AVAssetWriter(outputURL: url, fileType: .mov)
let input: AVAssetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings)
let pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: input, sourcePixelBufferAttributes: pixelBufferAttributes as [String: Any])
input.expectsMediaDataInRealTime = true
precondition(writer.canAdd(input))
writer.add(input)
precondition(writer.startWriting())
writer.startSession(atSourceTime: CMTime.zero)
let colorSpace: CGColorSpace = CGColorSpace(name: CGColorSpace.sRGB) ?? CGColorSpaceCreateDeviceRGB()
let context = CIContext(mtlDevice: MTLCreateSystemDefaultDevice()!)
Swift.print("Starting the render…")
// Preferred scenario: using CoreImage to fill the buffer from the pixel buffer adapter. Shows that
// CIImage + AVAssetWriterInputPixelBufferAdaptor are not working together.
for frameNumber in 0 ..< frameCount {
var pixelBuffer: CVPixelBuffer?
guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)
precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
let ciImage = CIImage(cgImage: frameImage)
context.render(ciImage, to: pixelBuffer!)
// ?? This fails – the pixel buffer doesn't get filled. AT ALL! Why? How to make it work?
let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
precondition(bytes.contains(where: { $0 != 0 }))
while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
}
// Unpreferred scenario: using CoreImage to fill the manually created buffer. Proves that CIImage
// can fill buffer and working.
// for frameNumber in 0 ..< frameCount {
// var pixelBuffer: CVPixelBuffer?
// precondition(CVPixelBufferCreate(nil, frameImage.width, frameImage.height, kCVPixelFormatType_32ARGB, pixelBufferAttributes as CFDictionary, &pixelBuffer) == kCVReturnSuccess)
//
// precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
// defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
// let ciImage = CIImage(cgImage: frameImage)
// context.render(ciImage, to: pixelBuffer!)
//
// // ? This passes.
// let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
// precondition(bytes.contains(where: { $0 != 0 }))
//
// while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
// precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }
// Unpreferred scenario: using CoreGraphics to fill the buffer from the pixel buffer adapter. Shows that
// buffer from pixel buffer adapter can be filled and working.
// for frameNumber in 0 ..< frameCount {
// var pixelBuffer: CVPixelBuffer?
// guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
// precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)
//
// precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
// defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
// guard let context: CGContext = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer!), width: frameImage.width, height: frameImage.height, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: colorSpace, bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue) else { preconditionFailure() }
// context.clear(CGRect(origin: .zero, size: frameSize))
// context.draw(frameImage, in: CGRect(origin: .zero, size: frameSize))
//
// // ? This passes.
// let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
// precondition(bytes.contains(where: { $0 != 0 }))
//
// while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
// precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }
let semaphore = DispatchSemaphore(value: 0)
input.markAsFinished()
writer.endSession(atSourceTime: CMTime(seconds: Double(frameCount) * frameRate, preferredTimescale: 600))
writer.finishWriting(completionHandler: { semaphore.signal() })
semaphore.wait()
Swift.print("Successfully finished rendering to (url.path)")
The following, however, works with CGContext
, but I need CIContext
in order to make use of GPU. The problem seems to be with pixel buffers provided by the AVAssetWriterInputPixelBufferAdaptor
's buffer pool. Rendering CIContext
into individually created buffers and appending them to the adapter works, but is highly inefficient. Rendering CIContext
into buffers provided by the adapter's pool results in no data being written into buffer at all, it literally contains all zeroes as if two are incompatible! However, rendering using CGImage
works, so as copying the data manually.
The main observation is that CIContext.render
appears to work asynchronously or something goes wrong between the buffer getting filled and data being written into the video stream. In other words there's no data in the buffer when it gets flushed. The following is kind of pointing in that direction:
- Removing buffer locking results in almost all frames being written, except for the first few, the above code actually produces a correct output, but with the actual data the behaviour is as described.
- Using a different codec, like ProRes422, results in almost all frames being written correctly, with just a few blanks – also the above code produces correct output, but larger and complex images result in skipped frames.
What's wrong with this code and what's the right way to do it?
P.S. Most iOS examples use pretty much the same implementation and seem to work perfectly fine. I found a hint that it might differ for macOS, but can't see any official documentation on this.
See Question&Answers more detail:
os