Last active
January 31, 2024 18:44
-
-
Save banjun/5179290fce141c44c54f63f31acc34f5 to your computer and use it in GitHub Desktop.
specific window capture implementation memo for https://github.com/mzp/HeartVoice
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Cocoa | |
import CoreGraphics | |
import Vision | |
struct TargetWindow { | |
let id: CGWindowID | |
let bounds: CGRect | |
init?(appName: String, windowTitle: String) { | |
guard let windows = CGWindowListCopyWindowInfo(.optionAll, kCGNullWindowID) as? [[String: Any]] else { return nil } | |
guard let window = (windows.first { | |
$0[kCGWindowOwnerName as String] as? String == appName && | |
$0[kCGWindowName as String] as? String == windowTitle}) else { return nil } | |
guard let id = window[kCGWindowNumber as String] as? Int else { return nil } | |
guard let rect = window[kCGWindowBounds as String] as? NSDictionary, let bounds = CGRect(dictionaryRepresentation: rect) else { return nil } | |
self.id = CGWindowID(id) | |
self.bounds = bounds | |
} | |
func captureBitmap(relativeBounds: CGRect? = nil) -> NSBitmapImageRep? { | |
let rect = relativeBounds.map {$0.offsetBy(dx: bounds.minX, dy: bounds.minY)} ?? .null | |
guard let capture = CGWindowListCreateImage(rect, .optionIncludingWindow, id, []) else { return nil } | |
return NSBitmapImageRep(cgImage: capture) | |
} | |
func capture(relativeBounds: CGRect? = nil) -> NSImage? { | |
guard let bitmap = captureBitmap(relativeBounds: relativeBounds) else { return nil } | |
let image = NSImage(size: bitmap.size) | |
image.addRepresentation(bitmap) | |
return image | |
} | |
} | |
final class ViewController: NSViewController { | |
let imageView = NSImageView(frame: .zero) | |
var timer: Timer? | |
override func viewDidLoad() { | |
super.viewDidLoad() | |
imageView.autoresizingMask = [.width, .height] | |
imageView.frame = view.bounds | |
view.addSubview(imageView) | |
} | |
override func viewDidAppear() { | |
super.viewDidAppear() | |
timer = Timer.scheduledTimer(withTimeInterval: 1, repeats: true) { [weak self] _ in | |
self?.capture() | |
} | |
} | |
override func viewWillDisappear() { | |
super.viewWillDisappear() | |
timer?.invalidate() | |
} | |
func capture() { | |
guard let player = TargetWindow(appName: "Mac Blu-ray Player", windowTitle: "Mac Blu-ray Player") else { return } | |
let image = player.capture(relativeBounds: CGRect(x: 0, y: player.bounds.height - 12 - 64, width: 45, height: 12)) | |
imageView.image = image | |
let textReq = VNDetectTextRectanglesRequest { req, error in | |
guard let observations = req.results as? [VNTextObservation] else { return } | |
let characterBoxes = observations.flatMap {$0.characterBoxes ?? []} | |
// NSLog("%@", "\(characterBoxes)") | |
guard let sourceImage = self.imageView.image else { return } | |
do { | |
let digits = try characterBoxes | |
.filter {abs($0.topLeft.y - $0.bottomRight.y) > 0.5} | |
.map { box -> Int64 in | |
let size = CGSize(width: abs(box.bottomRight.x - box.topLeft.x) * sourceImage.size.width, | |
height: abs(box.bottomRight.y - box.topLeft.y) * sourceImage.size.height) | |
let image = NSImage(size: CGSize(width: 28, height: 28)) | |
image.lockFocus() | |
NSColor.black.set() | |
CGRect(origin: .zero, size: image.size).fill() | |
sourceImage.draw(at: CGPoint(x: (28 - size.width) / 2, y: (28 - size.height) / 2), from: CGRect( | |
x: min(box.bottomRight.x, box.topLeft.x) * sourceImage.size.width, | |
y: min(box.bottomRight.y, box.topLeft.y) * sourceImage.size.height, | |
width: size.width, | |
height: size.height), operation: .copy, fraction: 1) | |
image.unlockFocus() | |
let prediction = try MNIST().prediction(input: MNISTInput(image: image.pixelBuffer()!)) | |
// NSLog("%@", "\(prediction.classLabel) \(prediction.prediction[prediction.classLabel])") | |
return prediction.classLabel | |
} | |
let prefixedReversedDigits = Array(([0] + digits).reversed()) | |
let components = stride(from: 0, to: prefixedReversedDigits.count - 1, by: 2) | |
.map {(prefixedReversedDigits[$0 + 1], prefixedReversedDigits[$0])}.reversed() | |
let positionString: String = components.map {"\($0)\($1)"}.joined(separator: ":") | |
NSLog("%@", positionString) | |
} catch _ {} | |
} | |
textReq.reportCharacterBoxes = true | |
guard let cgImage = (image?.representations[0] as! NSBitmapImageRep).cgImage else { return } | |
_ = try? VNImageRequestHandler(cgImage: cgImage).perform([textReq]) | |
} | |
} | |
// https://gist.github.com/DennisWeidmann/7c4b4bb72062bd1a40c714aa5d95a0d7 | |
extension NSImage { | |
func pixelBuffer() -> CVPixelBuffer? { | |
let width = self.size.width | |
let height = self.size.height | |
let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue, | |
kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary | |
var pixelBuffer: CVPixelBuffer? | |
let status = CVPixelBufferCreate(kCFAllocatorDefault, | |
Int(width), | |
Int(height), | |
kCVPixelFormatType_OneComponent8, | |
attrs, | |
&pixelBuffer) | |
guard let resultPixelBuffer = pixelBuffer, status == kCVReturnSuccess else { | |
return nil | |
} | |
CVPixelBufferLockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0)) | |
let pixelData = CVPixelBufferGetBaseAddress(resultPixelBuffer) | |
let colorspace = CGColorSpaceCreateDeviceGray() | |
guard let context = CGContext(data: pixelData, | |
width: Int(width), | |
height: Int(height), | |
bitsPerComponent: 8, | |
bytesPerRow: CVPixelBufferGetBytesPerRow(resultPixelBuffer), | |
space: colorspace, | |
bitmapInfo: CGImageAlphaInfo.none.rawValue) else {return nil} | |
// context.translateBy(x: 0, y: height) | |
// context.scaleBy(x: 1.0, y: -1.0) | |
let graphicsContext = NSGraphicsContext(cgContext: context, flipped: false) | |
NSGraphicsContext.saveGraphicsState() | |
NSGraphicsContext.current = graphicsContext | |
draw(in: CGRect(x: 0, y: 0, width: width, height: height)) | |
NSGraphicsContext.restoreGraphicsState() | |
CVPixelBufferUnlockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0)) | |
return resultPixelBuffer | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment