mirror of
https://github.com/clawdbot/clawdbot.git
synced 2026-02-01 03:47:45 +01:00
Mac: finish Moltbot rename
This commit is contained in:
425
apps/macos/Sources/Moltbot/CameraCaptureService.swift
Normal file
425
apps/macos/Sources/Moltbot/CameraCaptureService.swift
Normal file
@@ -0,0 +1,425 @@
|
||||
import AVFoundation
|
||||
import MoltbotIPC
|
||||
import MoltbotKit
|
||||
import CoreGraphics
|
||||
import Foundation
|
||||
import OSLog
|
||||
|
||||
actor CameraCaptureService {
|
||||
struct CameraDeviceInfo: Encodable, Sendable {
|
||||
let id: String
|
||||
let name: String
|
||||
let position: String
|
||||
let deviceType: String
|
||||
}
|
||||
|
||||
enum CameraError: LocalizedError, Sendable {
|
||||
case cameraUnavailable
|
||||
case microphoneUnavailable
|
||||
case permissionDenied(kind: String)
|
||||
case captureFailed(String)
|
||||
case exportFailed(String)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .cameraUnavailable:
|
||||
"Camera unavailable"
|
||||
case .microphoneUnavailable:
|
||||
"Microphone unavailable"
|
||||
case let .permissionDenied(kind):
|
||||
"\(kind) permission denied"
|
||||
case let .captureFailed(msg):
|
||||
msg
|
||||
case let .exportFailed(msg):
|
||||
msg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private let logger = Logger(subsystem: "bot.molt", category: "camera")
|
||||
|
||||
func listDevices() -> [CameraDeviceInfo] {
|
||||
Self.availableCameras().map { device in
|
||||
CameraDeviceInfo(
|
||||
id: device.uniqueID,
|
||||
name: device.localizedName,
|
||||
position: Self.positionLabel(device.position),
|
||||
deviceType: device.deviceType.rawValue)
|
||||
}
|
||||
}
|
||||
|
||||
func snap(
|
||||
facing: CameraFacing?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
deviceId: String?,
|
||||
delayMs: Int) async throws -> (data: Data, size: CGSize)
|
||||
{
|
||||
let facing = facing ?? .front
|
||||
let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality)
|
||||
let maxWidth = normalized.maxWidth
|
||||
let quality = normalized.quality
|
||||
let delayMs = max(0, delayMs)
|
||||
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .photo
|
||||
|
||||
guard let device = Self.pickCamera(facing: facing, deviceId: deviceId) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
|
||||
let input = try AVCaptureDeviceInput(device: device)
|
||||
guard session.canAddInput(input) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(input)
|
||||
|
||||
let output = AVCapturePhotoOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add photo output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxPhotoQualityPrioritization = .quality
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
await Self.warmUpCaptureSession()
|
||||
await self.waitForExposureAndWhiteBalance(device: device)
|
||||
await self.sleepDelayMs(delayMs)
|
||||
|
||||
let settings: AVCapturePhotoSettings = {
|
||||
if output.availablePhotoCodecTypes.contains(.jpeg) {
|
||||
return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg])
|
||||
}
|
||||
return AVCapturePhotoSettings()
|
||||
}()
|
||||
settings.photoQualityPrioritization = .quality
|
||||
|
||||
var delegate: PhotoCaptureDelegate?
|
||||
let rawData: Data = try await withCheckedThrowingContinuation { cont in
|
||||
let d = PhotoCaptureDelegate(cont)
|
||||
delegate = d
|
||||
output.capturePhoto(with: settings, delegate: d)
|
||||
}
|
||||
withExtendedLifetime(delegate) {}
|
||||
|
||||
let maxPayloadBytes = 5 * 1024 * 1024
|
||||
// Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit).
|
||||
let maxEncodedBytes = (maxPayloadBytes / 4) * 3
|
||||
let res = try JPEGTranscoder.transcodeToJPEG(
|
||||
imageData: rawData,
|
||||
maxWidthPx: maxWidth,
|
||||
quality: quality,
|
||||
maxBytes: maxEncodedBytes)
|
||||
return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx))
|
||||
}
|
||||
|
||||
func clip(
|
||||
facing: CameraFacing?,
|
||||
durationMs: Int?,
|
||||
includeAudio: Bool,
|
||||
deviceId: String?,
|
||||
outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool)
|
||||
{
|
||||
let facing = facing ?? .front
|
||||
let durationMs = Self.clampDurationMs(durationMs)
|
||||
let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
try await self.ensureAccess(for: .video)
|
||||
if includeAudio {
|
||||
try await self.ensureAccess(for: .audio)
|
||||
}
|
||||
|
||||
let session = AVCaptureSession()
|
||||
session.sessionPreset = .high
|
||||
|
||||
guard let camera = Self.pickCamera(facing: facing, deviceId: deviceId) else {
|
||||
throw CameraError.cameraUnavailable
|
||||
}
|
||||
let cameraInput = try AVCaptureDeviceInput(device: camera)
|
||||
guard session.canAddInput(cameraInput) else {
|
||||
throw CameraError.captureFailed("Failed to add camera input")
|
||||
}
|
||||
session.addInput(cameraInput)
|
||||
|
||||
if includeAudio {
|
||||
guard let mic = AVCaptureDevice.default(for: .audio) else {
|
||||
throw CameraError.microphoneUnavailable
|
||||
}
|
||||
let micInput = try AVCaptureDeviceInput(device: mic)
|
||||
guard session.canAddInput(micInput) else {
|
||||
throw CameraError.captureFailed("Failed to add microphone input")
|
||||
}
|
||||
session.addInput(micInput)
|
||||
}
|
||||
|
||||
let output = AVCaptureMovieFileOutput()
|
||||
guard session.canAddOutput(output) else {
|
||||
throw CameraError.captureFailed("Failed to add movie output")
|
||||
}
|
||||
session.addOutput(output)
|
||||
output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000)
|
||||
|
||||
session.startRunning()
|
||||
defer { session.stopRunning() }
|
||||
await Self.warmUpCaptureSession()
|
||||
|
||||
let tmpMovURL = FileManager().temporaryDirectory
|
||||
.appendingPathComponent("moltbot-camera-\(UUID().uuidString).mov")
|
||||
defer { try? FileManager().removeItem(at: tmpMovURL) }
|
||||
|
||||
let outputURL: URL = {
|
||||
if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
return URL(fileURLWithPath: outPath)
|
||||
}
|
||||
return FileManager().temporaryDirectory
|
||||
.appendingPathComponent("moltbot-camera-\(UUID().uuidString).mp4")
|
||||
}()
|
||||
|
||||
// Ensure we don't fail exporting due to an existing file.
|
||||
try? FileManager().removeItem(at: outputURL)
|
||||
|
||||
let logger = self.logger
|
||||
var delegate: MovieFileDelegate?
|
||||
let recordedURL: URL = try await withCheckedThrowingContinuation { cont in
|
||||
let d = MovieFileDelegate(cont, logger: logger)
|
||||
delegate = d
|
||||
output.startRecording(to: tmpMovURL, recordingDelegate: d)
|
||||
}
|
||||
withExtendedLifetime(delegate) {}
|
||||
|
||||
try await Self.exportToMP4(inputURL: recordedURL, outputURL: outputURL)
|
||||
return (path: outputURL.path, durationMs: durationMs, hasAudio: includeAudio)
|
||||
}
|
||||
|
||||
private func ensureAccess(for mediaType: AVMediaType) async throws {
|
||||
let status = AVCaptureDevice.authorizationStatus(for: mediaType)
|
||||
switch status {
|
||||
case .authorized:
|
||||
return
|
||||
case .notDetermined:
|
||||
let ok = await withCheckedContinuation(isolation: nil) { cont in
|
||||
AVCaptureDevice.requestAccess(for: mediaType) { granted in
|
||||
cont.resume(returning: granted)
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
case .denied, .restricted:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
@unknown default:
|
||||
throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone")
|
||||
}
|
||||
}
|
||||
|
||||
private nonisolated static func availableCameras() -> [AVCaptureDevice] {
|
||||
var types: [AVCaptureDevice.DeviceType] = [
|
||||
.builtInWideAngleCamera,
|
||||
.continuityCamera,
|
||||
]
|
||||
if let external = externalDeviceType() {
|
||||
types.append(external)
|
||||
}
|
||||
let session = AVCaptureDevice.DiscoverySession(
|
||||
deviceTypes: types,
|
||||
mediaType: .video,
|
||||
position: .unspecified)
|
||||
return session.devices
|
||||
}
|
||||
|
||||
private nonisolated static func externalDeviceType() -> AVCaptureDevice.DeviceType? {
|
||||
if #available(macOS 14.0, *) {
|
||||
return .external
|
||||
}
|
||||
// Use raw value to avoid deprecated symbol in the SDK.
|
||||
return AVCaptureDevice.DeviceType(rawValue: "AVCaptureDeviceTypeExternalUnknown")
|
||||
}
|
||||
|
||||
private nonisolated static func pickCamera(
|
||||
facing: CameraFacing,
|
||||
deviceId: String?) -> AVCaptureDevice?
|
||||
{
|
||||
if let deviceId, !deviceId.isEmpty {
|
||||
if let match = availableCameras().first(where: { $0.uniqueID == deviceId }) {
|
||||
return match
|
||||
}
|
||||
}
|
||||
let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back
|
||||
|
||||
if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) {
|
||||
return device
|
||||
}
|
||||
|
||||
// Many macOS cameras report `unspecified` position; fall back to any default.
|
||||
return AVCaptureDevice.default(for: .video)
|
||||
}
|
||||
|
||||
private nonisolated static func clampQuality(_ quality: Double?) -> Double {
|
||||
let q = quality ?? 0.9
|
||||
return min(1.0, max(0.05, q))
|
||||
}
|
||||
|
||||
nonisolated static func normalizeSnap(maxWidth: Int?, quality: Double?) -> (maxWidth: Int, quality: Double) {
|
||||
// Default to a reasonable max width to keep downstream payload sizes manageable.
|
||||
// If you need full-res, explicitly request a larger maxWidth.
|
||||
let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600
|
||||
let quality = Self.clampQuality(quality)
|
||||
return (maxWidth: maxWidth, quality: quality)
|
||||
}
|
||||
|
||||
private nonisolated static func clampDurationMs(_ ms: Int?) -> Int {
|
||||
let v = ms ?? 3000
|
||||
return min(60000, max(250, v))
|
||||
}
|
||||
|
||||
private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws {
|
||||
let asset = AVURLAsset(url: inputURL)
|
||||
guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else {
|
||||
throw CameraError.exportFailed("Failed to create export session")
|
||||
}
|
||||
export.shouldOptimizeForNetworkUse = true
|
||||
|
||||
if #available(macOS 15.0, *) {
|
||||
do {
|
||||
try await export.export(to: outputURL, as: .mp4)
|
||||
return
|
||||
} catch {
|
||||
throw CameraError.exportFailed(error.localizedDescription)
|
||||
}
|
||||
} else {
|
||||
export.outputURL = outputURL
|
||||
export.outputFileType = .mp4
|
||||
|
||||
try await withCheckedThrowingContinuation(isolation: nil) { (cont: CheckedContinuation<Void, Error>) in
|
||||
export.exportAsynchronously {
|
||||
cont.resume(returning: ())
|
||||
}
|
||||
}
|
||||
|
||||
switch export.status {
|
||||
case .completed:
|
||||
return
|
||||
case .failed:
|
||||
throw CameraError.exportFailed(export.error?.localizedDescription ?? "export failed")
|
||||
case .cancelled:
|
||||
throw CameraError.exportFailed("export cancelled")
|
||||
default:
|
||||
throw CameraError.exportFailed("export did not complete (\(export.status.rawValue))")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private nonisolated static func warmUpCaptureSession() async {
|
||||
// A short delay after `startRunning()` significantly reduces "blank first frame" captures on some devices.
|
||||
try? await Task.sleep(nanoseconds: 150_000_000) // 150ms
|
||||
}
|
||||
|
||||
private func waitForExposureAndWhiteBalance(device: AVCaptureDevice) async {
|
||||
let stepNs: UInt64 = 50_000_000
|
||||
let maxSteps = 30 // ~1.5s
|
||||
for _ in 0..<maxSteps {
|
||||
if !(device.isAdjustingExposure || device.isAdjustingWhiteBalance) {
|
||||
return
|
||||
}
|
||||
try? await Task.sleep(nanoseconds: stepNs)
|
||||
}
|
||||
}
|
||||
|
||||
private func sleepDelayMs(_ delayMs: Int) async {
|
||||
guard delayMs > 0 else { return }
|
||||
let ns = UInt64(min(delayMs, 10000)) * 1_000_000
|
||||
try? await Task.sleep(nanoseconds: ns)
|
||||
}
|
||||
|
||||
private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String {
|
||||
switch position {
|
||||
case .front: "front"
|
||||
case .back: "back"
|
||||
default: "unspecified"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate {
|
||||
private var cont: CheckedContinuation<Data, Error>?
|
||||
private var didResume = false
|
||||
|
||||
init(_ cont: CheckedContinuation<Data, Error>) {
|
||||
self.cont = cont
|
||||
}
|
||||
|
||||
func photoOutput(
|
||||
_ output: AVCapturePhotoOutput,
|
||||
didFinishProcessingPhoto photo: AVCapturePhoto,
|
||||
error: Error?)
|
||||
{
|
||||
guard !self.didResume, let cont else { return }
|
||||
self.didResume = true
|
||||
self.cont = nil
|
||||
if let error {
|
||||
cont.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
guard let data = photo.fileDataRepresentation() else {
|
||||
cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("No photo data"))
|
||||
return
|
||||
}
|
||||
if data.isEmpty {
|
||||
cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("Photo data empty"))
|
||||
return
|
||||
}
|
||||
cont.resume(returning: data)
|
||||
}
|
||||
|
||||
func photoOutput(
|
||||
_ output: AVCapturePhotoOutput,
|
||||
didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings,
|
||||
error: Error?)
|
||||
{
|
||||
guard let error else { return }
|
||||
guard !self.didResume, let cont else { return }
|
||||
self.didResume = true
|
||||
self.cont = nil
|
||||
cont.resume(throwing: error)
|
||||
}
|
||||
}
|
||||
|
||||
private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate {
|
||||
private var cont: CheckedContinuation<URL, Error>?
|
||||
private let logger: Logger
|
||||
|
||||
init(_ cont: CheckedContinuation<URL, Error>, logger: Logger) {
|
||||
self.cont = cont
|
||||
self.logger = logger
|
||||
}
|
||||
|
||||
func fileOutput(
|
||||
_ output: AVCaptureFileOutput,
|
||||
didFinishRecordingTo outputFileURL: URL,
|
||||
from connections: [AVCaptureConnection],
|
||||
error: Error?)
|
||||
{
|
||||
guard let cont else { return }
|
||||
self.cont = nil
|
||||
|
||||
if let error {
|
||||
let ns = error as NSError
|
||||
if ns.domain == AVFoundationErrorDomain,
|
||||
ns.code == AVError.maximumDurationReached.rawValue
|
||||
{
|
||||
cont.resume(returning: outputFileURL)
|
||||
return
|
||||
}
|
||||
|
||||
self.logger.error("camera record failed: \(error.localizedDescription, privacy: .public)")
|
||||
cont.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
|
||||
cont.resume(returning: outputFileURL)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user