samsonjs · February 25, 2025 01:00 · tuanvu0995 · Sep 5, 2024
diff --git a/SendableWrapper.swift b/SendableWrapper.swift
 //
 // Created by Sami Samhuri on 2024-06-26.
 //

 import Foundation

 final class SendableWrapper<T>: @unchecked Sendable {
    private var unsafeValue: T

    private let lock = NSLock()

    var value: T {
        get {
            lock.withLock { unsafeValue }
        }
        set {
            lock.withLock { unsafeValue = newValue }
        }
    }

    init(_ value: T) {
        unsafeValue = value
    }
 }
diff --git a/VFAExportSession.swift b/VFAExportSession.swift
 //
 // Created by Sami Samhuri on 2024-06-26.
 // Based on SDAVAssetExportSession.
 //

 import Foundation
 import AVFoundation

 final class VFAExportSession {
    /// Enables video composition and parameters for the session.
    var videoComposition: AVVideoComposition? {
        get { sendableBag.videoComposition }
        set { sendableBag.videoComposition = newValue }
    }

    /// Enables audio mixing and parameters for the session.
    var audioMix: AVAudioMix? {
        get { sendableBag.audioMix }
        set { sendableBag.audioMix = newValue }
    }

    /// Audio output configuration dictionary, using keys defined in `<AVFoundation/AVAudioSettings.h>`
    var audioOutputConfiguration: [String: Any] {
        get { sendableBag.audioOutputConfiguration }
        set { sendableBag.audioOutputConfiguration = newValue }
    }

    /// Video output configuration dictionary, using keys defined in `<AVFoundation/AVVideoSettings.h>`
    var videoOutputConfiguration: [String: Any] {
        get { sendableBag.videoOutputConfiguration }
        set { sendableBag.videoOutputConfiguration = newValue }
    }

    /// Time range or limit of an export from `CMTime.zero` to `CMTime.positiveInfinity`
    var timeRange = CMTimeRange(start: .zero, end: .positiveInfinity)

    /// Indicates if an export should be optimized for network use.
    var optimizeForNetworkUse: Bool = false

    private let sendableBag: SendableBag

    /// Initializes a session with an asset to export.
    ///
    /// - Parameter asset: The asset to export.
    init(asset: sending AVAsset) {
        self.sendableBag = SendableBag(asset: asset)
    }

    func export(
        to outputURL: URL,
        as fileType: AVFileType
    ) async throws {
        let (asset, audioMix, audioOutputConfiguration, videoComposition, videoOutputConfiguration) = try sendableBag.consume()
        let duration = try await asset.load(.duration)
        let videoTracks = try await asset.sendTracks(withMediaType: .video)
        let audioTracks = try await asset.sendTracks(withMediaType: .audio)
        let encoder = try await VFASampleEncoder(
            asset: asset,
            assetDuration: duration,
            audioTracks: audioTracks,
            audioMix: audioMix,
            audioOutputConfiguration: audioOutputConfiguration,
            videoTracks: videoTracks,
            videoComposition: videoComposition,
            videoOutputConfiguration: videoOutputConfiguration,
            outputURL: outputURL,
            fileType: fileType
        ) { progress in
            #warning("FIXME: do something with progress")
        }

        do {
            try await encoder.encode()
        } catch {
            try? FileManager.default.removeItem(at: outputURL)
            throw error
        }
    }

    private class SendableBag: @unchecked Sendable {
        private let lock = NSLock()

        private var unsafeAsset: AVAsset?

        private var unsafeAudioMix: AVAudioMix?

        var audioMix: AVAudioMix? {
            get { lock.withLock { unsafeAudioMix } }
            set { lock.withLock { unsafeAudioMix = newValue } }
        }

        private var unsafeAudioOutputConfiguration: [String: Any] = [:]

        var audioOutputConfiguration: [String: Any] {
            get { lock.withLock { unsafeAudioOutputConfiguration } }
            set { lock.withLock { unsafeAudioOutputConfiguration = newValue } }
        }

        private var unsafeVideoComposition: AVVideoComposition?

        var videoComposition: AVVideoComposition? {
            get { lock.withLock { unsafeVideoComposition } }
            set { lock.withLock { unsafeVideoComposition = newValue } }
        }

        private var unsafeVideoOutputConfiguration: [String: Any] = [:]

        var videoOutputConfiguration: [String: Any] {
            get { lock.withLock { unsafeVideoOutputConfiguration } }
            set { lock.withLock { unsafeVideoOutputConfiguration = newValue } }
        }

        init(asset: sending AVAsset) {
            self.unsafeAsset = asset
        }

        func consume() throws -> sending (AVAsset, AVAudioMix?, [String: Any], AVVideoComposition?, [String: Any]) {
            try lock.withLock {
                guard let asset = unsafeAsset else { throw CancellationError() }

                defer {
                    self.unsafeAsset = nil
                    self.unsafeAudioMix = nil
                    self.unsafeAudioOutputConfiguration = [:]
                    self.unsafeVideoComposition = nil
                    self.unsafeVideoOutputConfiguration = [:]
                }

                return (asset, unsafeAudioMix, unsafeAudioOutputConfiguration, unsafeVideoComposition, unsafeVideoOutputConfiguration)
            }
        }
    }
 }
diff --git a/VFASampleEncoder.swift b/VFASampleEncoder.swift
 //
 // Created by Sami Samhuri on 2024-06-26.
 // Based on SDAVAssetExportSession.
 //

 import AVFoundation

 actor VFASampleEncoder {
    enum Error: LocalizedError, CustomStringConvertible {
        case setupFailure
        case readFailure((any Swift.Error)?)
        case writeFailure((any Swift.Error)?)

        public var description: String {
            get {
                switch self {
                case .setupFailure:
                    return "technical mumbo jumbo"
                case .readFailure:
                    return "what did you feed me"
                case .writeFailure:
                    return "we're going nowhere"
                }
            }
        }

        public var errorDescription: String? {
            description
        }
    }

    let queue = DispatchSerialQueue(label: "VFASampleEncoder", autoreleaseFrequency: .workItem, target: .global())

    nonisolated var unownedExecutor: UnownedSerialExecutor {
        queue.asUnownedSerialExecutor()
    }

    let reader: AVAssetReader
    let videoOutput: AVAssetReaderOutput
    let audioOutput: AVAssetReaderOutput?
    let writer: AVAssetWriter
    let videoInput: AVAssetWriterInput
    let audioInput: AVAssetWriterInput?
    let pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor
    let timeRange: CMTimeRange
    let duration: TimeInterval
    let updateProgress: @MainActor @Sendable (Float) -> Void

    private var progress: Float = 0.0
    private var lastSamplePresentationTime: CMTime = .zero

    init(
        reader: AVAssetReader,
        audioOutput: AVAssetReaderOutput?,
        videoOutput: AVAssetReaderOutput,
        writer: AVAssetWriter,
        audioInput: AVAssetWriterInput?,
        videoInput: AVAssetWriterInput,
        pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor,
        timeRange: CMTimeRange,
        duration: TimeInterval,
        updateProgress: @escaping @MainActor @Sendable (Float) -> Void
    ) {
        self.reader = reader
        self.audioOutput = audioOutput
        self.videoOutput = videoOutput
        self.writer = writer
        self.audioInput = audioInput
        self.videoInput = videoInput
        self.pixelBufferAdaptor = pixelBufferAdaptor
        self.timeRange = timeRange
        self.duration = duration
        self.updateProgress = updateProgress
    }

    init(
        asset: AVAsset,
        assetDuration: CMTime,
        audioTracks: [AVAssetTrack],
        audioMix: AVAudioMix?,
        audioOutputConfiguration: [String: Any],
        videoTracks: [AVAssetTrack],
        videoComposition: AVVideoComposition?,
        videoOutputConfiguration: [String: Any],
        outputURL: URL,
        fileType: AVFileType,
        timeRange: CMTimeRange = CMTimeRange(start: .zero, duration: .positiveInfinity),
        optimizeForNetworkUse: Bool = true,
        updateProgress: @escaping @MainActor @Sendable (Float) -> Void
    ) async throws {
        precondition(!videoTracks.isEmpty)

        guard let width = videoComposition.map({ Int($0.renderSize.width) })
                  ?? (videoOutputConfiguration[AVVideoWidthKey] as? NSNumber)?.intValue,
              let height = videoComposition.map({ Int($0.renderSize.height) })
                  ?? (videoOutputConfiguration[AVVideoHeightKey] as? NSNumber)?.intValue else {
            NSLog("Export dimensions must be provided in a video composition or video output configuration")
            throw Error.setupFailure
        }

        let duration =
        if timeRange.duration.isValid && !timeRange.duration.isPositiveInfinity {
            timeRange.duration.seconds
        } else {
            assetDuration.seconds
        }

        let reader = try Self.setUpReader(asset: asset, timeRange: timeRange)
        let writer = try Self.setUpWriter(outputURL: outputURL, fileType: fileType, optimizeForNetworkUse: optimizeForNetworkUse)
        guard writer.canApply(outputSettings: videoOutputConfiguration, forMediaType: .video) else {
            throw Error.setupFailure
        }

        let videoOutput = AVAssetReaderVideoCompositionOutput(videoTracks: videoTracks, videoSettings: nil)
        videoOutput.alwaysCopiesSampleData = false
        videoOutput.videoComposition = videoComposition
        if reader.canAdd(videoOutput) {
            reader.add(videoOutput)
        }
        let videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoOutputConfiguration)
        if writer.canAdd(videoInput) {
            writer.add(videoInput)
        }

        let pixelBufferAttributes: [String: Any] = [
            kCVPixelBufferPixelFormatTypeKey as String: NSNumber(integerLiteral: Int(kCVPixelFormatType_32RGBA)),
            kCVPixelBufferWidthKey as String: NSNumber(integerLiteral: width),
            kCVPixelBufferHeightKey as String: NSNumber(integerLiteral: height),
            "IOSurfaceOpenGLESTextureCompatibility": NSNumber(booleanLiteral: true),
            "IOSurfaceOpenGLESFBOCompatibility": NSNumber(booleanLiteral: true),
        ]
        let pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoInput, sourcePixelBufferAttributes: pixelBufferAttributes)

        let audioOutput: AVAssetReaderOutput?
        let audioInput: AVAssetWriterInput?
        if audioTracks.isEmpty {
            audioOutput = nil
            audioInput = nil
        } else {
            let output = AVAssetReaderAudioMixOutput(audioTracks: audioTracks, audioSettings: nil)
            if reader.canAdd(output) {
                reader.add(output)
            }
            let input = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: audioOutputConfiguration)
            if writer.canAdd(input) {
                writer.add(input)
            }
            audioOutput = output
            audioInput = input
        }

        self.init(
            reader: reader,
            audioOutput: audioOutput,
            videoOutput: videoOutput,
            writer: writer,
            audioInput: audioInput,
            videoInput: videoInput,
            pixelBufferAdaptor: pixelBufferAdaptor,
            timeRange: timeRange,
            duration: duration,
            updateProgress: updateProgress
        )
    }

    private static func setUpReader(asset: AVAsset, timeRange: CMTimeRange) throws -> AVAssetReader {
        let reader = try AVAssetReader(asset: asset)
        reader.timeRange = timeRange
        return reader
    }

    private static func setUpWriter(
        outputURL: URL,
        fileType: AVFileType,
        optimizeForNetworkUse: Bool
    ) throws -> AVAssetWriter {
        let writer = try AVAssetWriter(outputURL: outputURL, fileType: fileType)
        writer.shouldOptimizeForNetworkUse = optimizeForNetworkUse
        return writer
    }

    func encode() async throws {
        writer.startWriting()
        reader.startReading()
        writer.startSession(atSourceTime: timeRange.start)

        return try await withCheckedThrowingContinuation { continuation in
            let audioFinished = SendableWrapper(false)
            let videoFinished = SendableWrapper(false)

            @Sendable func checkFinished() {
                guard audioFinished.value, videoFinished.value else { return }

                Task {
                    await self.complete { result in
                        continuation.resume(with: result)
                    }
                }
            }

            if let audioInput {
                audioInput.requestMediaDataWhenReady(on: queue) {
                    let hasMoreSamples = self.encodeReadySamples(type: .audio)
                    if !hasMoreSamples {
                        audioFinished.value = true
                        checkFinished()
                    }
                }
            } else {
                audioFinished.value = true
            }

            videoInput.requestMediaDataWhenReady(on: queue) {
                let hasMoreSamples = self.encodeReadySamples(type: .video)
                if !hasMoreSamples {
                    videoFinished.value = true
                    checkFinished()
                }
            }
        }
    }

    private func complete(completion: @escaping @Sendable (Result<Void, Swift.Error>) -> Void) {
        if reader.status == .cancelled || writer.status == .cancelled {
            completion(.failure(CancellationError()))
        } else if writer.status == .failed {
            reader.cancelReading()
            completion(.failure(Error.writeFailure(writer.error)))
        } else if reader.status == .failed {
            writer.cancelWriting()
            completion(.failure(Error.readFailure(reader.error)))
        } else {
            writer.finishWriting {
                completion(.success(()))
            }
        }
    }

    nonisolated private func encodeReadySamples(type: AVMediaType) -> Bool {
        assumeIsolated { _self in
            _self.actuallyEncodeReadySamples(type: type)
        }
    }

    private func actuallyEncodeReadySamples(type: AVMediaType) -> Bool {
        let output: AVAssetReaderOutput?
        let input: AVAssetWriterInput?
        switch type {
        case .audio:
            output = audioOutput
            input = audioInput
        case .video:
            output = videoOutput
            input = videoInput
        default:
            assertionFailure("Unsupported media type \(type)")
            return false
        }

        guard let output, let input else {
            assertionFailure("No output or input for media type \(type)")
            return false
        }

        while input.isReadyForMoreMediaData {
            guard reader.status == .reading && writer.status == .writing,
                  let sampleBuffer = output.copyNextSampleBuffer() else {
                input.markAsFinished()
                NSLog("Finished encoding \(type) samples")
                return false
            }

            lastSamplePresentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) - timeRange.start
            let progress = Float(lastSamplePresentationTime.seconds / duration)
            DispatchQueue.main.async { self.updateProgress(progress) }

            switch type {
            case .audio:
                guard input.append(sampleBuffer) else {
                    NSLog("Failed to append audio sample buffer \(sampleBuffer) to input \(input)")
                    return false
                }
            case .video:
                guard let pixelBufferPool = pixelBufferAdaptor.pixelBufferPool else {
                    NSLog("No pixel buffer pool available on adaptor \(pixelBufferAdaptor)")
                    return false
                }
                var toRenderBuffer: CVPixelBuffer?
                let result = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pixelBufferPool, &toRenderBuffer)
                var handled = false
                if result == kCVReturnSuccess, let toBuffer = toRenderBuffer {
                    handled = pixelBufferAdaptor.append(toBuffer, withPresentationTime: lastSamplePresentationTime)
                    if !handled { return false }
                }
                if !handled {
                    guard input.append(sampleBuffer) else {
                        NSLog("Failed to append video sample buffer \(sampleBuffer) to input \(input)")
                        return false
                    }
                }
            default:
                assertionFailure("Unsupported media type: \(type)")
                return false
            }
        }

        // Everything was appended successfully, return true indicating there's more to do.
        NSLog("Completed encoding ready \(type) samples, more to come...")
        return true
    }
 }
	//
	// Created by Sami Samhuri on 2024-06-26.
	//

	import Foundation

	final class SendableWrapper<T>: @unchecked Sendable {
	private var unsafeValue: T

	private let lock = NSLock()

	var value: T {
	get {
	lock.withLock { unsafeValue }
	}
	set {
	lock.withLock { unsafeValue = newValue }
	}
	}

	init(_ value: T) {
	unsafeValue = value
	}
	}
	//
	// Created by Sami Samhuri on 2024-06-26.
	// Based on SDAVAssetExportSession.
	//

	import Foundation
	import AVFoundation

	final class VFAExportSession {
	/// Enables video composition and parameters for the session.
	var videoComposition: AVVideoComposition? {
	get { sendableBag.videoComposition }
	set { sendableBag.videoComposition = newValue }
	}

	/// Enables audio mixing and parameters for the session.
	var audioMix: AVAudioMix? {
	get { sendableBag.audioMix }
	set { sendableBag.audioMix = newValue }
	}

	/// Audio output configuration dictionary, using keys defined in `<AVFoundation/AVAudioSettings.h>`
	var audioOutputConfiguration: [String: Any] {
	get { sendableBag.audioOutputConfiguration }
	set { sendableBag.audioOutputConfiguration = newValue }
	}

	/// Video output configuration dictionary, using keys defined in `<AVFoundation/AVVideoSettings.h>`
	var videoOutputConfiguration: [String: Any] {
	get { sendableBag.videoOutputConfiguration }
	set { sendableBag.videoOutputConfiguration = newValue }
	}

	/// Time range or limit of an export from `CMTime.zero` to `CMTime.positiveInfinity`
	var timeRange = CMTimeRange(start: .zero, end: .positiveInfinity)

	/// Indicates if an export should be optimized for network use.
	var optimizeForNetworkUse: Bool = false

	private let sendableBag: SendableBag

	/// Initializes a session with an asset to export.
	///
	/// - Parameter asset: The asset to export.
	init(asset: sending AVAsset) {
	self.sendableBag = SendableBag(asset: asset)
	}

	func export(
	to outputURL: URL,
	as fileType: AVFileType
	) async throws {
	let (asset, audioMix, audioOutputConfiguration, videoComposition, videoOutputConfiguration) = try sendableBag.consume()
	let duration = try await asset.load(.duration)
	let videoTracks = try await asset.sendTracks(withMediaType: .video)
	let audioTracks = try await asset.sendTracks(withMediaType: .audio)
	let encoder = try await VFASampleEncoder(
	asset: asset,
	assetDuration: duration,
	audioTracks: audioTracks,
	audioMix: audioMix,
	audioOutputConfiguration: audioOutputConfiguration,
	videoTracks: videoTracks,
	videoComposition: videoComposition,
	videoOutputConfiguration: videoOutputConfiguration,
	outputURL: outputURL,
	fileType: fileType
	) { progress in
	#warning("FIXME: do something with progress")
	}

	do {
	try await encoder.encode()
	} catch {
	try? FileManager.default.removeItem(at: outputURL)
	throw error
	}
	}

	private class SendableBag: @unchecked Sendable {
	private let lock = NSLock()

	private var unsafeAsset: AVAsset?

	private var unsafeAudioMix: AVAudioMix?

	var audioMix: AVAudioMix? {
	get { lock.withLock { unsafeAudioMix } }
	set { lock.withLock { unsafeAudioMix = newValue } }
	}

	private var unsafeAudioOutputConfiguration: [String: Any] = [:]

	var audioOutputConfiguration: [String: Any] {
	get { lock.withLock { unsafeAudioOutputConfiguration } }
	set { lock.withLock { unsafeAudioOutputConfiguration = newValue } }
	}

	private var unsafeVideoComposition: AVVideoComposition?

	var videoComposition: AVVideoComposition? {
	get { lock.withLock { unsafeVideoComposition } }
	set { lock.withLock { unsafeVideoComposition = newValue } }
	}

	private var unsafeVideoOutputConfiguration: [String: Any] = [:]

	var videoOutputConfiguration: [String: Any] {
	get { lock.withLock { unsafeVideoOutputConfiguration } }
	set { lock.withLock { unsafeVideoOutputConfiguration = newValue } }
	}

	init(asset: sending AVAsset) {
	self.unsafeAsset = asset
	}

	func consume() throws -> sending (AVAsset, AVAudioMix?, [String: Any], AVVideoComposition?, [String: Any]) {
	try lock.withLock {
	guard let asset = unsafeAsset else { throw CancellationError() }

	defer {
	self.unsafeAsset = nil
	self.unsafeAudioMix = nil
	self.unsafeAudioOutputConfiguration = [:]
	self.unsafeVideoComposition = nil
	self.unsafeVideoOutputConfiguration = [:]
	}

	return (asset, unsafeAudioMix, unsafeAudioOutputConfiguration, unsafeVideoComposition, unsafeVideoOutputConfiguration)
	}
	}
	}
	}