Skip to content

Instantly share code, notes, and snippets.

@UlisseMini
Created October 14, 2024 02:34
Show Gist options
  • Save UlisseMini/3924c48781a4cd958d99336596dbdded to your computer and use it in GitHub Desktop.
Save UlisseMini/3924c48781a4cd958d99336596dbdded to your computer and use it in GitHub Desktop.
Use macos accessibility to draw rectangles around all buttons on the screen
import AppKit
import ApplicationServices
import Cocoa
import CoreGraphics.CGGeometry
import CoreServices
import Foundation
func isWindowVisible(_ window: AXUIElement) -> Bool {
var value: CFTypeRef?
let result = AXUIElementCopyAttributeValue(window, kAXMinimizedAttribute as CFString, &value)
if result == .success, let minimized = value as? Bool {
return !minimized
}
// If we can't determine, assume it's visible
return true
}
func getAllButtonMetadata() -> [(String, CGRect, [String: String])] {
var buttonMetadata: [(String, CGRect, [String: String])] = []
// Get all running applications
let runningApps = NSWorkspace.shared.runningApplications
for app in runningApps {
let pid = app.processIdentifier
let appRef = AXUIElementCreateApplication(pid)
var windowsRef: CFTypeRef?
let result = AXUIElementCopyAttributeValue(
appRef, kAXWindowsAttribute as CFString, &windowsRef)
if result == .success, let windows = windowsRef as? [AXUIElement] {
for window in windows {
if isWindowVisible(window) {
print("\(app.localizedName ?? "Unknown App") is visible")
buttonMetadata += getButtonMetadata(window: window)
} else {
print("\(app.localizedName ?? "Unknown App") is not visible")
}
}
}
}
return buttonMetadata.filter { !$0.1.isEmpty }
}
func getButtonMetadata(window: AXUIElement) -> [(String, CGRect, [String: String])] {
var buttonMetadata: [(String, CGRect, [String: String])] = []
var childrenRef: CFTypeRef?
let result = AXUIElementCopyAttributeValue(
window, kAXChildrenAttribute as CFString, &childrenRef)
if result == .success, let children = childrenRef as? [AXUIElement] {
for child in children {
var role: CFTypeRef?
var description: CFTypeRef?
AXUIElementCopyAttributeValue(child, kAXRoleAttribute as CFString, &role)
AXUIElementCopyAttributeValue(child, kAXDescriptionAttribute as CFString, &description)
if let role = role as? String, role == "AXButton" {
var positionRef: CFTypeRef?
var sizeRef: CFTypeRef?
AXUIElementCopyAttributeValue(child, kAXPositionAttribute as CFString, &positionRef)
AXUIElementCopyAttributeValue(child, kAXSizeAttribute as CFString, &sizeRef)
var position = CGPoint.zero
var size = CGSize.zero
if let positionRef = positionRef, CFGetTypeID(positionRef) == AXValueGetTypeID() {
AXValueGetValue(positionRef as! AXValue, .cgPoint, &position)
}
if let sizeRef = sizeRef, CFGetTypeID(sizeRef) == AXValueGetTypeID() {
AXValueGetValue(sizeRef as! AXValue, .cgSize, &size)
}
let frame = CGRect(origin: position, size: size)
var metadata: [String: String] = [:]
let attributes = ["AXRole", "AXDescription", "AXHelp", "AXValue"]
for attribute in attributes {
var value: CFTypeRef?
if AXUIElementCopyAttributeValue(child, attribute as CFString, &value)
== .success
{
metadata[attribute] = value as? String ?? "nil"
}
}
buttonMetadata.append((description as? String ?? "Unknown", frame, metadata))
}
// Recursively check child elements
buttonMetadata += getButtonMetadata(window: child)
}
}
return buttonMetadata
}
func takeScreenshot() -> NSImage? {
let displayID = CGMainDisplayID()
let imageRef = CGDisplayCreateImage(displayID)
return imageRef.flatMap { cgImage in
NSImage(
cgImage: cgImage,
size: NSSize(width: CGFloat(cgImage.width), height: CGFloat(cgImage.height)))
}
}
func overlayButtonsOnScreenshot(screenshot: NSImage, buttons: [(String, CGRect, [String: String])])
-> NSImage
{
let image = NSImage(size: screenshot.size)
image.lockFocus()
screenshot.draw(in: NSRect(origin: .zero, size: screenshot.size))
let context = NSGraphicsContext.current?.cgContext
context?.setLineWidth(2)
let darkRed = NSColor(calibratedRed: 0.7, green: 0, blue: 0, alpha: 1.0)
context?.setStrokeColor(darkRed.cgColor)
let font = NSFont.boldSystemFont(ofSize: 12)
let attributes: [NSAttributedString.Key: Any] = [
.font: font,
.foregroundColor: darkRed,
.strokeColor: NSColor.white,
.strokeWidth: -1.0, // Negative value for outer stroke
]
// Calculate scaling factor
let screenFrame = NSScreen.main?.frame ?? .zero
let scaleX = screenshot.size.width / screenFrame.width
let scaleY = screenshot.size.height / screenFrame.height
for (description, frame, _) in buttons {
// Convert screen coordinates to image coordinates with scaling
let imageX = frame.origin.x * scaleX
let imageY = (screenFrame.height - frame.origin.y - frame.size.height) * scaleY
let imageWidth = frame.size.width * scaleX
let imageHeight = frame.size.height * scaleY
// Draw rectangle (stroke only, no fill)
let rect = NSRect(x: imageX, y: imageY, width: imageWidth, height: imageHeight)
context?.stroke(rect)
// Draw text
let buttonDescription =
"\(description)\n(\(Int(frame.origin.x)), \(Int(frame.origin.y)))\n\(Int(frame.size.width))x\(Int(frame.size.height))"
let attrString = NSAttributedString(string: buttonDescription, attributes: attributes)
let textSize = attrString.size()
// Adjust text position to avoid going off-screen
var textX = imageX + 5
var textY = imageY - textSize.height - 5 // Place text above the button
if textX + textSize.width > screenshot.size.width {
textX = imageX - textSize.width - 5
}
if textY < 0 {
textY = imageY + imageHeight + 5 // Place text below the button if it would go off-screen at the top
}
attrString.draw(at: NSPoint(x: textX, y: textY))
}
image.unlockFocus()
return image
}
func saveImage(_ image: NSImage, to path: String) {
guard let tiffData = image.tiffRepresentation,
let bitmapImage = NSBitmapImageRep(data: tiffData),
let pngData = bitmapImage.representation(using: .png, properties: [:])
else {
print("Failed to create PNG data.")
return
}
do {
try pngData.write(to: URL(fileURLWithPath: path))
print("Screenshot saved to: \(path)")
} catch {
print("Failed to save screenshot: \(error)")
}
}
func main() {
print("Getting all button metadata...")
let buttons = getAllButtonMetadata()
print("Taking screenshot...")
guard let screenshot = takeScreenshot() else {
print("Failed to take screenshot.")
return
}
print("Overlaying buttons on screenshot...")
let overlaidImage = overlayButtonsOnScreenshot(screenshot: screenshot, buttons: buttons)
print("Saving screenshot...")
let imagePath = "./all_buttons_overlay.png"
saveImage(overlaidImage, to: imagePath)
// Open the saved image
NSWorkspace.shared.open(URL(fileURLWithPath: imagePath))
}
// Call the function to take screenshot, overlay button info, and save the image
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment