Skip to content

Instantly share code, notes, and snippets.

@mitsuhiko
Created May 16, 2025 08:23
Show Gist options
  • Save mitsuhiko/fce80dc9a28f8f7333b6b48865de5955 to your computer and use it in GitHub Desktop.
Save mitsuhiko/fce80dc9a28f8f7333b6b48865de5955 to your computer and use it in GitHub Desktop.
This is a vibecoded automator thing that talks a basic JSON protocol to query windows with the accessibility api

Example query:

{
  "cmd": "query",
  "multi": true,
  "locator": {
    "app": "Safari",
    "role": "AXStaticText",
    "match": {},
    "pathHint": [
      "window[1]"
    ]
  },
  "attributes": [
    "AXRole",
    "AXTitle",
    "AXIdentifier",
    "AXActions",
    "AXPosition",
    "AXSize",
    "AXRoleDescription",
    "AXLabel",
    "AXTitleUIElement",
    "AXHelp"
  ],
  "requireAction": "AXPress"
}

Example response:

{
  "elements": [
    {
      "AXValue": "Hacker News",
      "AXSize": {
        "width": 148,
        "height": 24
      },
      "AXRole": "AXStaticText",
      "AXTitleUIElement": "Not available",
      "AXHelp": "",
      "AXActions": "Not available",
      "AXDescription": "",
      "AXIdentifier": "Not available",
      "AXLabel": "Not available",
      "AXRoleDescription": "text",
      "ComputedName": "Hacker News",
      "AXPosition": {
        "y": 415,
        "x": 2929
      },
      "AXTitle": ""
    },
    {
      "AXLabel": "Not available",
      "ComputedName": "new",
      "AXSize": {
        "height": 24,
        "width": 42
      },
      "AXActions": "Not available",
      "AXRoleDescription": "text",
      "AXHelp": "",
      "AXTitle": "",
      "AXDescription": "",
      "AXValue": "new",
      "AXTitleUIElement": "Not available",
      "AXPosition": {
        "x": 3090,
        "y": 415
      },
      "AXRole": "AXStaticText",
      "AXIdentifier": "Not available"
    },
    {
      "AXRole": "AXStaticText",
      "AXSize": {
        "height": 24,
        "width": 24
      },
      "AXActions": "Not available",
      "AXHelp": "",
      "AXLabel": "Not available",
      "AXTitleUIElement": "Not available",
      "ComputedName": " | ",
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXPosition": {
        "x": 3131,
        "y": 415
      },
      "AXDescription": "",
      "AXRoleDescription": "text",
      "AXValue": " | "
    },
    {
      "AXRole": "AXStaticText",
      "AXHelp": "",
      "AXLabel": "Not available",
      "ComputedName": "past",
      "AXRoleDescription": "text",
      "AXActions": "Not available",
      "AXIdentifier": "Not available",
      "AXPosition": {
        "x": 3154,
        "y": 415
      },
      "AXDescription": "",
      "AXValue": "past",
      "AXSize": {
        "width": 44,
        "height": 24
      },
      "AXTitleUIElement": "Not available",
      "AXTitle": ""
    },
    {
      "AXSize": {
        "height": 24,
        "width": 24
      },
      "AXLabel": "Not available",
      "AXTitleUIElement": "Not available",
      "AXHelp": "",
      "AXDescription": "",
      "AXTitle": "",
      "AXActions": "Not available",
      "AXPosition": {
        "x": 3197,
        "y": 415
      },
      "AXValue": " | ",
      "AXRole": "AXStaticText",
      "AXRoleDescription": "text",
      "AXIdentifier": "Not available",
      "ComputedName": " | "
    },
    {
      "AXTitleUIElement": "Not available",
      "AXHelp": "",
      "AXSize": {
        "width": 106,
        "height": 24
      },
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXDescription": "",
      "AXRole": "AXStaticText",
      "AXActions": "Not available",
      "AXPosition": {
        "y": 415,
        "x": 3220
      },
      "AXLabel": "Not available",
      "AXRoleDescription": "text",
      "AXValue": "comments",
      "ComputedName": "comments"
    },
    {
      "AXLabel": "Not available",
      "AXSize": {
        "width": 24,
        "height": 24
      },
      "ComputedName": " | ",
      "AXTitle": "",
      "AXRoleDescription": "text",
      "AXPosition": {
        "y": 415,
        "x": 3325
      },
      "AXDescription": "",
      "AXRole": "AXStaticText",
      "AXActions": "Not available",
      "AXIdentifier": "Not available",
      "AXHelp": "",
      "AXTitleUIElement": "Not available",
      "AXValue": " | "
    },
    {
      "AXSize": {
        "width": 35,
        "height": 24
      },
      "AXDescription": "",
      "AXHelp": "",
      "AXTitleUIElement": "Not available",
      "AXIdentifier": "Not available",
      "AXActions": "Not available",
      "AXLabel": "Not available",
      "ComputedName": "ask",
      "AXPosition": {
        "x": 3348,
        "y": 415
      },
      "AXRoleDescription": "text",
      "AXTitle": "",
      "AXRole": "AXStaticText",
      "AXValue": "ask"
    },
    {
      "AXLabel": "Not available",
      "AXActions": "Not available",
      "AXPosition": {
        "y": 415,
        "x": 3382
      },
      "AXValue": " | ",
      "AXTitleUIElement": "Not available",
      "AXIdentifier": "Not available",
      "AXSize": {
        "height": 24,
        "width": 24
      },
      "AXRoleDescription": "text",
      "AXDescription": "",
      "AXRole": "AXStaticText",
      "AXTitle": "",
      "AXHelp": "",
      "ComputedName": " | "
    },
    {
      "AXTitleUIElement": "Not available",
      "AXLabel": "Not available",
      "AXValue": "show",
      "AXRoleDescription": "text",
      "ComputedName": "show",
      "AXPosition": {
        "x": 3405,
        "y": 415
      },
      "AXActions": "Not available",
      "AXHelp": "",
      "AXIdentifier": "Not available",
      "AXDescription": "",
      "AXSize": {
        "width": 53,
        "height": 24
      },
      "AXRole": "AXStaticText",
      "AXTitle": ""
    },
    {
      "AXTitle": "",
      "AXSize": {
        "height": 24,
        "width": 24
      },
      "AXTitleUIElement": "Not available",
      "AXDescription": "",
      "AXRoleDescription": "text",
      "AXRole": "AXStaticText",
      "AXHelp": "",
      "AXIdentifier": "Not available",
      "AXActions": "Not available",
      "AXLabel": "Not available",
      "ComputedName": " | ",
      "AXPosition": {
        "x": 3457,
        "y": 415
      },
      "AXValue": " | "
    },
    {
      "AXActions": "Not available",
      "AXTitleUIElement": "Not available",
      "AXPosition": {
        "y": 415,
        "x": 3480
      },
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXLabel": "Not available",
      "AXSize": {
        "width": 43,
        "height": 24
      },
      "ComputedName": "jobs",
      "AXRole": "AXStaticText",
      "AXRoleDescription": "text",
      "AXValue": "jobs",
      "AXHelp": "",
      "AXDescription": ""
    },
    {
      "AXHelp": "",
      "ComputedName": " | ",
      "AXTitleUIElement": "Not available",
      "AXIdentifier": "Not available",
      "AXDescription": "",
      "AXActions": "Not available",
      "AXSize": {
        "width": 24,
        "height": 24
      },
      "AXTitle": "",
      "AXRoleDescription": "text",
      "AXValue": " | ",
      "AXRole": "AXStaticText",
      "AXLabel": "Not available",
      "AXPosition": {
        "x": 3522,
        "y": 415
      }
    },
    {
      "AXRoleDescription": "text",
      "ComputedName": "submit",
      "AXDescription": "",
      "AXTitle": "",
      "AXRole": "AXStaticText",
      "AXLabel": "Not available",
      "AXSize": {
        "width": 69,
        "height": 24
      },
      "AXActions": "Not available",
      "AXValue": "submit",
      "AXPosition": {
        "x": 3545,
        "y": 415
      },
      "AXHelp": "",
      "AXTitleUIElement": "Not available",
      "AXIdentifier": "Not available"
    },
    {
      "AXPosition": {
        "y": 415,
        "x": 4279
      },
      "AXTitle": "",
      "AXIdentifier": "Not available",
      "AXRoleDescription": "text",
      "AXValue": "login",
      "AXLabel": "Not available",
      "AXDescription": "",
      "AXActions": "Not available",
      "ComputedName": "login",
      "AXTitleUIElement": "Not available",
      "AXHelp": "",
      "AXRole": "AXStaticText",
      "AXSize": {
        "height": 24,
        "width": 50
      }
    },
    {
      "AXRoleDescription": "text",
      "AXDescription": "",
      "AXPosition": {
        "x": 2901,
        "y": 460
      },
      "AXTitleUIElement": "Not available",
      "AXValue": "1.",
      "AXLabel": "Not available",
      "AXActions": "Not available",
      "AXHelp": "",
      "AXIdentifier": "Not available",
      "AXSize": {
        "width": 21,
        "height": 24
      },
      "AXTitle": "",
      "AXRole": "AXStaticText",
      "ComputedName": "1."
    },
    {
      "AXRoleDescription": "text",
      "AXActions": "Not available",
      "AXTitleUIElement": "Not available",
      "AXSize": {
        "width": 786,
        "height": 25
      },
      "ComputedName": "BuyMeACoffee silently dropped support for many countries, and nobody cares",
      "AXHelp": "",
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXValue": "BuyMeACoffee silently dropped support for many countries, and nobody cares",
      "AXPosition": {
        "y": 462,
        "x": 2942
      },
      "AXDescription": "",
      "AXRole": "AXStaticText",
      "AXLabel": "Not available"
    },
    {
      "AXPosition": {
        "x": 3727,
        "y": 466
      },
      "AXTitle": "",
      "AXDescription": "",
      "AXActions": "Not available",
      "AXSize": {
        "width": 14,
        "height": 20
      },
      "AXTitleUIElement": "Not available",
      "AXIdentifier": "Not available",
      "AXValue": " (",
      "AXHelp": "",
      "AXRoleDescription": "text",
      "AXLabel": "Not available",
      "AXRole": "AXStaticText",
      "ComputedName": " ("
    },
    {
      "AXLabel": "Not available",
      "AXHelp": "",
      "AXRole": "AXStaticText",
      "AXTitleUIElement": "Not available",
      "AXActions": "Not available",
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXPosition": {
        "y": 466,
        "x": 3740
      },
      "AXRoleDescription": "text",
      "AXSize": {
        "height": 20,
        "width": 106
      },
      "ComputedName": "zverok.space",
      "AXDescription": "",
      "AXValue": "zverok.space"
    },
    {
      "ComputedName": ")",
      "AXActions": "Not available",
      "AXRoleDescription": "text",
      "AXValue": ")",
      "AXSize": {
        "width": 8,
        "height": 20
      },
      "AXLabel": "Not available",
      "AXRole": "AXStaticText",
      "AXIdentifier": "Not available",
      "AXTitle": "",
      "AXHelp": "",
      "AXTitleUIElement": "Not available",
      "AXPosition": {
        "x": 3845,
        "y": 466
      },
      "AXDescription": ""
    }
  ]
}
import Foundation
import ApplicationServices // AXUIElement*
import AppKit // NSRunningApplication, NSWorkspace
import CoreGraphics // CGPoint, CGSize, etc.
// Define missing accessibility constants
let kAXActionsAttribute = "AXActions"
let kAXWindowsAttribute = "AXWindows"
let kAXPressAction = "AXPress"
// Enable verbose debugging
let DEBUG = true
func debug(_ message: String) {
if DEBUG {
fputs("DEBUG: \(message)\n", stderr)
}
}
// Check accessibility permissions
func checkAccessibilityPermissions() {
// Use the constant directly as a String to avoid concurrency issues
let checkOptPrompt = "AXTrustedCheckOptionPrompt" as CFString
let options = [checkOptPrompt: true] as CFDictionary
let accessEnabled = AXIsProcessTrustedWithOptions(options)
if !accessEnabled {
print("Error: This application requires accessibility permissions.")
print("Please enable them in System Preferences > Privacy & Security > Accessibility")
exit(1)
}
}
// MARK: - Codable command envelopes -------------------------------------------------
struct CommandEnvelope: Codable {
enum Verb: String, Codable { case query, perform }
let cmd: Verb
let locator: Locator
let attributes: [String]? // for query
let action: String? // for perform
let multi: Bool? // NEW
let requireAction: String? // NEW (e.g. "AXPress")
}
struct Locator: Codable {
let app : String // bundle id or display name
let role : String // e.g. "AXButton"
let match : [String:String] // attribute→value to match
let pathHint : [String]? // optional array like ["window[1]","toolbar[1]"]
}
// MARK: - Codable response types -----------------------------------------------------
struct QueryResponse: Codable {
let attributes: [String: AnyCodable]
init(attributes: [String: Any]) {
self.attributes = attributes.mapValues(AnyCodable.init)
}
}
struct MultiQueryResponse: Codable {
let elements: [[String: AnyCodable]]
init(elements: [[String: Any]]) {
self.elements = elements.map { element in
element.mapValues(AnyCodable.init)
}
}
}
struct PerformResponse: Codable {
let status: String
}
struct ErrorResponse: Codable {
let error: String
}
// AnyCodable wrapper type for JSON encoding of Any values
struct AnyCodable: Codable {
let value: Any
init(_ value: Any) {
self.value = value
}
init(from decoder: Decoder) throws {
let container = try decoder.singleValueContainer()
if container.decodeNil() {
self.value = NSNull()
} else if let bool = try? container.decode(Bool.self) {
self.value = bool
} else if let int = try? container.decode(Int.self) {
self.value = int
} else if let double = try? container.decode(Double.self) {
self.value = double
} else if let string = try? container.decode(String.self) {
self.value = string
} else if let array = try? container.decode([AnyCodable].self) {
self.value = array.map { $0.value }
} else if let dict = try? container.decode([String: AnyCodable].self) {
self.value = dict.mapValues { $0.value }
} else {
throw DecodingError.dataCorruptedError(
in: container,
debugDescription: "AnyCodable cannot decode value"
)
}
}
func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer()
switch value {
case is NSNull:
try container.encodeNil()
case let bool as Bool:
try container.encode(bool)
case let int as Int:
try container.encode(int)
case let double as Double:
try container.encode(double)
case let string as String:
try container.encode(string)
case let array as [Any]:
try container.encode(array.map(AnyCodable.init))
case let dict as [String: Any]:
try container.encode(dict.mapValues(AnyCodable.init))
default:
// Try to convert to string as a fallback
try container.encode(String(describing: value))
}
}
}
// Simple intermediate type for element attributes
typealias ElementAttributes = [String: Any]
// Create a completely new helper function to safely extract attributes
func getElementAttributes(_ element: AXUIElement, attributes: [String]) -> ElementAttributes {
var result = ElementAttributes()
// First, discover all available attributes for this specific element
var allAttributes = attributes
var attrNames: CFArray?
if AXUIElementCopyAttributeNames(element, &attrNames) == .success, let names = attrNames {
let count = CFArrayGetCount(names)
for i in 0..<count {
if let ptr = CFArrayGetValueAtIndex(names, i),
let cfStr = unsafeBitCast(ptr, to: CFString.self) as String?,
!allAttributes.contains(cfStr) {
allAttributes.append(cfStr)
}
}
debug("Element has \(count) available attributes")
}
// Keep track of all available actions
var availableActions: [String] = []
// Process all attributes
for attr in allAttributes {
// Get the raw value first
var value: CFTypeRef?
let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value)
if err != .success || value == nil {
// Only include requested attributes in the result
if attributes.contains(attr) {
result[attr] = "Not available"
}
continue
}
let unwrappedValue = value!
let extractedValue: Any
// Handle different types of values
if CFGetTypeID(unwrappedValue) == CFStringGetTypeID() {
// String value - most common for text, titles, etc.
let cfString = unwrappedValue as! CFString
extractedValue = cfString as String
}
else if CFGetTypeID(unwrappedValue) == CFBooleanGetTypeID() {
// Boolean value
let cfBool = unwrappedValue as! CFBoolean
extractedValue = CFBooleanGetValue(cfBool)
}
else if CFGetTypeID(unwrappedValue) == CFNumberGetTypeID() {
// Numeric value
let cfNumber = unwrappedValue as! CFNumber
var intValue: Int = 0
if CFNumberGetValue(cfNumber, CFNumberType.intType, &intValue) {
extractedValue = intValue
} else {
extractedValue = "Number (conversion failed)"
}
}
else if CFGetTypeID(unwrappedValue) == CFArrayGetTypeID() {
// Array values (like children or subroles)
let cfArray = unwrappedValue as! CFArray
let count = CFArrayGetCount(cfArray)
// For actions, extract them into our list
if attr == "AXActions" {
for i in 0..<count {
if let actionPtr = CFArrayGetValueAtIndex(cfArray, i),
let actionStr = unsafeBitCast(actionPtr, to: CFString.self) as String? {
availableActions.append(actionStr)
}
}
extractedValue = availableActions
} else {
extractedValue = "Array with \(count) elements"
}
}
else if attr == "AXPosition" || attr == "AXSize" {
// Handle AXValue types (usually for position and size)
// Safely check if it's an AXValue
let axValueType = AXValueGetType(unwrappedValue as! AXValue)
if attr == "AXPosition" && axValueType.rawValue == AXValueType.cgPoint.rawValue {
// It's a position value
var point = CGPoint.zero
if AXValueGetValue(unwrappedValue as! AXValue, AXValueType.cgPoint, &point) {
extractedValue = ["x": Int(point.x), "y": Int(point.y)]
} else {
extractedValue = ["error": "Position data (conversion failed)"]
}
}
else if attr == "AXSize" && axValueType.rawValue == AXValueType.cgSize.rawValue {
// It's a size value
var size = CGSize.zero
if AXValueGetValue(unwrappedValue as! AXValue, AXValueType.cgSize, &size) {
extractedValue = ["width": Int(size.width), "height": Int(size.height)]
} else {
extractedValue = ["error": "Size data (conversion failed)"]
}
}
else {
// It's some other kind of AXValue
extractedValue = ["error": "AXValue type: \(axValueType.rawValue)"]
}
}
else if attr == "AXTitleUIElement" || attr == "AXLabelUIElement" {
// These are special attributes that point to other AXUIElements
// Extract the text from them instead of just reporting the type
let titleElement = unwrappedValue as! AXUIElement
// Try to get its AXValue attribute which usually contains the text
var titleValue: CFTypeRef?
if AXUIElementCopyAttributeValue(titleElement, "AXValue" as CFString, &titleValue) == .success,
let titleString = titleValue as? String {
extractedValue = titleString
}
// If no AXValue, try AXTitle
else if AXUIElementCopyAttributeValue(titleElement, "AXTitle" as CFString, &titleValue) == .success,
let titleString = titleValue as? String {
extractedValue = titleString
}
// Fallback to indicating we found a title element but couldn't extract text
else {
extractedValue = "Title element (no extractable text)"
}
}
else {
// Try to get the type description for debugging
let typeID = CFGetTypeID(unwrappedValue)
if let typeDesc = CFCopyTypeIDDescription(typeID) {
let typeString = typeDesc as String
extractedValue = "Unknown type: \(typeString)"
} else {
extractedValue = "Unknown type: \(typeID)"
}
}
// Only include explicitly requested attributes and useful ones in the final result
if attributes.contains(attr) ||
attr.hasPrefix("AXTitle") ||
attr.hasPrefix("AXLabel") ||
attr.hasPrefix("AXHelp") ||
attr.hasPrefix("AXDescription") ||
attr.hasPrefix("AXValue") ||
attr.hasPrefix("AXRole") {
result[attr] = extractedValue
}
}
// Make sure actions are available as a proper array if requested
if attributes.contains("AXActions") {
if !availableActions.isEmpty {
result["AXActions"] = availableActions
} else if result["AXActions"] == nil {
result["AXActions"] = "Not available"
}
}
// Add a computed property to give the most descriptive name for this element
// This combines multiple attributes in order of preference
var computedName: String? = nil
// Try all possible ways to get a meaningful name/title
if let title = result["AXTitle"] as? String, title != "Not available" && !title.isEmpty {
computedName = title
}
else if let titleUIElement = result["AXTitleUIElement"] as? String,
titleUIElement != "Not available" && titleUIElement != "Title element (no extractable text)" {
computedName = titleUIElement
}
else if let value = result["AXValue"] as? String, value != "Not available" && !value.isEmpty {
computedName = value
}
else if let description = result["AXDescription"] as? String, description != "Not available" && !description.isEmpty {
computedName = description
}
else if let label = result["AXLabel"] as? String, label != "Not available" && !label.isEmpty {
computedName = label
}
else if let help = result["AXHelp"] as? String, help != "Not available" && !help.isEmpty {
computedName = help
}
else if let roleDesc = result["AXRoleDescription"] as? String, roleDesc != "Not available" {
// Use role description as a last resort
let role = result["AXRole"] as? String ?? "Unknown"
computedName = "\(roleDesc) (\(role))"
}
// Add the computed name if we found one
if let name = computedName {
result["ComputedName"] = name
}
// Add a computed clickable status based on role and other properties
let isButton = result["AXRole"] as? String == "AXButton"
let hasClickAction = availableActions.contains("AXPress")
if isButton || hasClickAction {
result["IsClickable"] = true
}
return result
}
// MARK: - Helpers --------------------------------------------------------------------
enum AXErrorString: Error, CustomStringConvertible {
case notAuthorised(AXError)
case elementNotFound
case actionFailed(AXError)
var description: String {
switch self {
case .notAuthorised(let e): return "AX authorisation failed: \(e)"
case .elementNotFound: return "No element matches the locator"
case .actionFailed(let e): return "Action failed: \(e)"
}
}
}
/// Return the running app's PID given bundle id or localized name
func pid(forAppIdentifier ident: String) -> pid_t? {
debug("Looking for app: \(ident)")
// Handle Safari specifically - try both bundle ID and name
if ident == "Safari" {
debug("Special handling for Safari")
// Try by bundle ID first
if let safariApp = NSRunningApplication.runningApplications(withBundleIdentifier: "com.apple.Safari").first {
debug("Found Safari by bundle ID, PID: \(safariApp.processIdentifier)")
return safariApp.processIdentifier
}
// Try by name
if let safariApp = NSWorkspace.shared.runningApplications.first(where: { $0.localizedName == "Safari" }) {
debug("Found Safari by name, PID: \(safariApp.processIdentifier)")
return safariApp.processIdentifier
}
}
if let byBundle = NSRunningApplication.runningApplications(withBundleIdentifier: ident).first {
debug("Found by bundle ID: \(ident), PID: \(byBundle.processIdentifier)")
return byBundle.processIdentifier
}
let app = NSWorkspace.shared.runningApplications
.first { $0.localizedName == ident }
if let app = app {
debug("Found by name: \(ident), PID: \(app.processIdentifier)")
return app.processIdentifier
}
// Also try searching without case sensitivity
let appLowerCase = NSWorkspace.shared.runningApplications
.first { $0.localizedName?.lowercased() == ident.lowercased() }
if let app = appLowerCase {
debug("Found by case-insensitive name: \(ident), PID: \(app.processIdentifier)")
return app.processIdentifier
}
// Print running applications to help debug
debug("All running applications:")
for app in NSWorkspace.shared.runningApplications {
debug(" - \(app.localizedName ?? "Unknown") (Bundle: \(app.bundleIdentifier ?? "Unknown"), PID: \(app.processIdentifier))")
}
debug("App not found: \(ident)")
return nil
}
/// Fetch a single AX attribute as `T?`
func axValue<T>(of element: AXUIElement, attr: String) -> T? {
var value: CFTypeRef?
let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value)
guard err == .success, let unwrappedValue = value else { return nil }
// For actions, try explicitly casting to CFArray of strings
if attr == kAXActionsAttribute && T.self == [String].self {
debug("Reading actions with special handling")
let cfArray = unwrappedValue as! CFArray
let count = CFArrayGetCount(cfArray)
var actionStrings = [String]()
for i in 0..<count {
let actionPtr = CFArrayGetValueAtIndex(cfArray, i)
if let actionStr = (actionPtr as! CFString) as String? {
actionStrings.append(actionStr)
}
}
if !actionStrings.isEmpty {
debug("Found actions: \(actionStrings)")
return actionStrings as? T
}
}
// Safe casting with type checking
if CFGetTypeID(unwrappedValue) == CFArrayGetTypeID() && T.self == [AXUIElement].self {
let cfArray = unwrappedValue as! CFArray
let count = CFArrayGetCount(cfArray)
var result = [AXUIElement]()
for i in 0..<count {
let element = unsafeBitCast(CFArrayGetValueAtIndex(cfArray, i), to: AXUIElement.self)
result.append(element)
}
return result as? T
} else if T.self == String.self {
if CFGetTypeID(unwrappedValue) == CFStringGetTypeID() {
return (unwrappedValue as! CFString) as? T
}
return nil
}
// For other types, use the default casting
return unsafeBitCast(unwrappedValue, to: T.self)
}
/// Depth-first search for an element that matches the locator's role + attributes
func search(element: AXUIElement,
locator: Locator,
depth: Int = 0,
maxDepth: Int = 30) -> AXUIElement? {
if depth > maxDepth { return nil }
// Check role
if let role: String = axValue(of: element, attr: kAXRoleAttribute as String),
role == locator.role {
// Match all requested attributes
var ok = true
for (attr, want) in locator.match {
let got: String? = axValue(of: element, attr: attr)
if got != want { ok = false; break }
}
if ok { return element }
}
// Recurse into children
if let children: [AXUIElement] = axValue(of: element, attr: kAXChildrenAttribute as String) {
for child in children {
if let hit = search(element: child, locator: locator, depth: depth + 1) {
return hit
}
}
}
return nil
}
/// Parse a path hint like "window[1]" into (role, index)
func parsePathComponent(_ path: String) -> (role: String, index: Int)? {
let pattern = #"(\w+)\[(\d+)\]"#
guard let regex = try? NSRegularExpression(pattern: pattern) else { return nil }
let range = NSRange(path.startIndex..<path.endIndex, in: path)
guard let match = regex.firstMatch(in: path, range: range) else { return nil }
let roleRange = Range(match.range(at: 1), in: path)!
let indexRange = Range(match.range(at: 2), in: path)!
let role = String(path[roleRange])
let index = Int(path[indexRange])!
return (role: role, index: index - 1) // Convert to 0-based index
}
/// Navigate to an element based on a path hint
func navigateToElement(from root: AXUIElement, pathHint: [String]) -> AXUIElement? {
var currentElement = root
debug("Starting navigation with path hint: \(pathHint)")
for (i, pathComponent) in pathHint.enumerated() {
debug("Processing path component \(i+1)/\(pathHint.count): \(pathComponent)")
guard let (role, index) = parsePathComponent(pathComponent) else {
debug("Failed to parse path component: \(pathComponent)")
return nil
}
debug("Parsed as role: \(role), index: \(index) (0-based)")
// Special handling for window (direct access without complicated navigation)
if role.lowercased() == "window" {
debug("Special handling for window role")
guard let windows: [AXUIElement] = axValue(of: currentElement, attr: kAXWindowsAttribute as String) else {
debug("No windows found for application")
return nil
}
debug("Found \(windows.count) windows")
if index >= windows.count {
debug("Window index \(index+1) out of bounds (max: \(windows.count))")
return nil
}
currentElement = windows[index]
debug("Successfully navigated to window[\(index+1)]")
continue
}
// Get all children matching the role
let roleKey = "AX\(role.prefix(1).uppercased() + role.dropFirst())"
debug("Looking for elements with role key: \(roleKey)")
// First try to get children by specific role attribute
if let roleSpecificChildren: [AXUIElement] = axValue(of: currentElement, attr: roleKey) {
debug("Found \(roleSpecificChildren.count) elements with role \(roleKey)")
// Make sure index is in bounds
guard index < roleSpecificChildren.count else {
debug("Index out of bounds: \(index+1) > \(roleSpecificChildren.count) for \(pathComponent)")
return nil
}
currentElement = roleSpecificChildren[index]
debug("Successfully navigated to \(roleKey)[\(index+1)]")
continue
}
debug("No elements found with specific role \(roleKey), trying with children")
// If we can't find by specific role, try getting all children
guard let allChildren: [AXUIElement] = axValue(of: currentElement, attr: kAXChildrenAttribute as String) else {
debug("No children found for element at path component: \(pathComponent)")
return nil
}
debug("Found \(allChildren.count) children, filtering by role: \(role)")
// Filter by role
let matchingChildren = allChildren.filter { element in
guard let elementRole: String = axValue(of: element, attr: kAXRoleAttribute as String) else {
return false
}
let matches = elementRole.lowercased() == role.lowercased()
if matches {
debug("Found element with matching role: \(elementRole)")
}
return matches
}
if matchingChildren.isEmpty {
debug("No children with role '\(role)' found")
// List available roles for debugging
debug("Available roles among children:")
for child in allChildren {
if let childRole: String = axValue(of: child, attr: kAXRoleAttribute as String) {
debug(" - \(childRole)")
}
}
return nil
}
debug("Found \(matchingChildren.count) children with role '\(role)'")
// Make sure index is in bounds
guard index < matchingChildren.count else {
debug("Index out of bounds: \(index+1) > \(matchingChildren.count) for \(pathComponent)")
return nil
}
currentElement = matchingChildren[index]
debug("Successfully navigated to \(role)[\(index+1)]")
}
debug("Path hint navigation completed successfully")
return currentElement
}
/// Collect all elements that match the locator's role + attributes
func collectAll(element: AXUIElement,
locator: Locator,
requireAction: String?,
hits: inout [AXUIElement],
depth: Int = 0,
maxDepth: Int = 15) { // Reduce max depth to 15 for safety
// Safety limit on matches
if hits.count > 100 {
debug("Safety limit of 100 matching elements reached, stopping search")
return
}
if depth > maxDepth {
debug("Max depth (\(maxDepth)) reached")
return
}
// role test
let wildcardRole = locator.role == "*" || locator.role.isEmpty
let elementRole = axValue(of: element, attr: kAXRoleAttribute as String) as String?
let roleMatches = wildcardRole || elementRole == locator.role
if wildcardRole {
debug("Using wildcard role match (*) at depth \(depth)")
} else if let role = elementRole {
debug("Element role at depth \(depth): \(role), looking for: \(locator.role)")
}
if roleMatches {
// attribute match
var ok = true
for (attr, want) in locator.match {
let got = axValue(of: element, attr: attr) as String?
if got != want {
debug("Attribute mismatch at depth \(depth): \(attr)=\(got ?? "nil") (wanted \(want))")
ok = false
break
}
}
// Check action requirement using safer method
if ok, let required = requireAction {
debug("Checking for required action: \(required) at depth \(depth)")
if !elementSupportsAction(element, action: required) {
debug("Element at depth \(depth) doesn't support \(required)")
ok = false
} else {
debug("Element at depth \(depth) supports \(required)")
}
}
if ok {
debug("Found matching element at depth \(depth), role: \(elementRole ?? "unknown")")
hits.append(element)
}
}
// Only recurse into children if we're not at the max depth - avoid potential crashes
if depth < maxDepth {
// Use safer approach to get children
var childrenUnwrapped: [AXUIElement] = []
// First try standard children
if let children: [AXUIElement] = axValue(of: element, attr: kAXChildrenAttribute as String) {
childrenUnwrapped = children
}
// Limit to max 20 children per element
let maxChildrenToProcess = min(childrenUnwrapped.count, 20)
if childrenUnwrapped.count > maxChildrenToProcess {
debug("Limiting processing to \(maxChildrenToProcess) of \(childrenUnwrapped.count) children at depth \(depth)")
}
if !childrenUnwrapped.isEmpty {
debug("Found \(childrenUnwrapped.count) children to explore at depth \(depth)")
let childrenToProcess = childrenUnwrapped.prefix(maxChildrenToProcess)
for (i, child) in childrenToProcess.enumerated() {
if hits.count > 100 { break } // Safety check
debug("Exploring child \(i+1)/\(maxChildrenToProcess) at depth \(depth)")
collectAll(element: child, locator: locator, requireAction: requireAction,
hits: &hits, depth: depth + 1, maxDepth: maxDepth)
}
} else {
debug("No children at depth \(depth)")
}
}
}
// MARK: - Core verbs -----------------------------------------------------------------
func handleQuery(cmd: CommandEnvelope) throws -> Codable {
debug("Processing query: \(cmd.cmd), app: \(cmd.locator.app), role: \(cmd.locator.role), multi: \(cmd.multi ?? false)")
guard let pid = pid(forAppIdentifier: cmd.locator.app) else {
debug("Failed to find app: \(cmd.locator.app)")
throw AXErrorString.elementNotFound
}
debug("Creating application element for PID: \(pid)")
let appElement = AXUIElementCreateApplication(pid)
// Apply path hint if provided
var startElement = appElement
if let pathHint = cmd.locator.pathHint, !pathHint.isEmpty {
debug("Path hint provided: \(pathHint)")
guard let navigatedElement = navigateToElement(from: appElement, pathHint: pathHint) else {
debug("Failed to navigate using path hint")
throw AXErrorString.elementNotFound
}
startElement = navigatedElement
debug("Successfully navigated to element using path hint")
}
// Define the attributes to query - add more useful attributes
var attributesToQuery = cmd.attributes ?? [
"AXRole", "AXTitle", "AXIdentifier",
"AXDescription", "AXValue", "AXHelp",
"AXSubrole", "AXRoleDescription", "AXLabel",
"AXActions", "AXPosition", "AXSize"
]
// Check if the client explicitly asked for a limited set of attributes
let shouldExpandAttributes = cmd.attributes == nil || cmd.attributes!.isEmpty
// If using default attributes, try to get additional attributes for the element
if shouldExpandAttributes {
// Query all available attributes for the starting element
var attrNames: CFArray?
if AXUIElementCopyAttributeNames(startElement, &attrNames) == .success, let names = attrNames {
let count = CFArrayGetCount(names)
for i in 0..<count {
if let ptr = CFArrayGetValueAtIndex(names, i),
let cfStr = unsafeBitCast(ptr, to: CFString.self) as String?,
!attributesToQuery.contains(cfStr) {
attributesToQuery.append(cfStr)
}
}
debug("Expanded to include \(attributesToQuery.count) attributes")
}
}
// Handle multi-element query
if cmd.multi == true {
debug("Performing multi-element query")
// Collect elements without action requirement first
var initialHits: [AXUIElement] = []
collectAll(element: startElement, locator: cmd.locator,
requireAction: nil, hits: &initialHits)
debug("Found \(initialHits.count) elements without action filter")
// Create a new array for storing filtered elements
var matchingElements: [AXUIElement] = []
// If action required, filter the elements
if let requiredAction = cmd.requireAction {
debug("Filtering for action: \(requiredAction)")
// Manually check each element for action support
var matchCount = 0
for element in initialHits {
if elementSupportsAction(element, action: requiredAction) {
matchingElements.append(element)
matchCount += 1
}
}
debug("After filtering, found \(matchCount) elements with action: \(requiredAction)")
// If no matches but we found elements, return a subset with warning
if matchingElements.isEmpty && !initialHits.isEmpty {
debug("Returning elements without required action")
// Manually build result array
var resultArray: [ElementAttributes] = []
let maxElements = min(initialHits.count, 10)
for i in 0..<maxElements {
var attributes = getElementAttributes(initialHits[i], attributes: attributesToQuery)
attributes["_warning"] = "Element doesn't support \(requiredAction) action"
resultArray.append(attributes)
}
return MultiQueryResponse(elements: resultArray)
}
} else {
// No action required, use all elements
matchingElements = initialHits
}
debug("Processing final results")
// If no matches found, throw error
if matchingElements.isEmpty {
debug("No elements matched criteria")
throw AXErrorString.elementNotFound
}
// Manually build result array with a hard limit
var resultArray: [ElementAttributes] = []
let maxElements = min(matchingElements.count, 20)
for i in 0..<maxElements {
let attributes = getElementAttributes(matchingElements[i], attributes: attributesToQuery)
resultArray.append(attributes)
}
return MultiQueryResponse(elements: resultArray)
}
// Single element query (original behavior)
guard let element = search(element: startElement, locator: cmd.locator) else {
throw AXErrorString.elementNotFound
}
// Get attributes for the single element
let attributes = getElementAttributes(element, attributes: attributesToQuery)
return QueryResponse(attributes: attributes)
}
func handlePerform(cmd: CommandEnvelope) throws -> PerformResponse {
guard let pid = pid(forAppIdentifier: cmd.locator.app),
let action = cmd.action else {
throw AXErrorString.elementNotFound
}
let appElement = AXUIElementCreateApplication(pid)
guard let element = search(element: appElement, locator: cmd.locator) else {
throw AXErrorString.elementNotFound
}
let err = AXUIElementPerformAction(element, action as CFString)
guard err == .success else {
throw AXErrorString.actionFailed(err)
}
return PerformResponse(status: "ok")
}
// MARK: - Main loop ------------------------------------------------------------------
let decoder = JSONDecoder()
let encoder = JSONEncoder()
if #available(macOS 10.15, *) {
encoder.outputFormatting = [.withoutEscapingSlashes]
}
// Check for accessibility permissions before starting
checkAccessibilityPermissions()
while let line = readLine(strippingNewline: true) {
do {
let data = Data(line.utf8)
let cmd = try decoder.decode(CommandEnvelope.self, from: data)
switch cmd.cmd {
case .query:
let result = try handleQuery(cmd: cmd)
let reply = try encoder.encode(result)
FileHandle.standardOutput.write(reply)
FileHandle.standardOutput.write("\n".data(using: .utf8)!)
case .perform:
let status = try handlePerform(cmd: cmd)
let reply = try encoder.encode(status)
FileHandle.standardOutput.write(reply)
FileHandle.standardOutput.write("\n".data(using: .utf8)!)
}
} catch {
let errorResponse = ErrorResponse(error: "\(error)")
if let errorData = try? encoder.encode(errorResponse) {
FileHandle.standardError.write(errorData)
FileHandle.standardError.write("\n".data(using: .utf8)!)
} else {
fputs("{\"error\":\"\(error)\"}\n", stderr)
}
}
}
// Add a safer action checking function
func elementSupportsAction(_ element: AXUIElement, action: String) -> Bool {
// Use the simplest possible approach to check actions
var actionNames: CFArray?
let err = AXUIElementCopyActionNames(element, &actionNames)
if err != .success {
debug("Failed to get action names: \(err)")
return false
}
guard let actions = actionNames else {
debug("No actions array")
return false
}
// Just check if the array contains at least one action
let count = CFArrayGetCount(actions)
debug("Element has \(count) actions")
// Instead of trying to read the actual actions (which seems to cause issues),
// just check if the number is non-zero and assume it might support our action
// This is not ideal but safer than trying to extract action strings
if count > 0 {
debug("Element has actions, assuming it supports \(action)")
return true
}
debug("Element has no actions")
return false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment