Last active
July 12, 2024 07:39
-
-
Save snowzurfer/1e90678d0d23d3295dda9a0cc93b2453 to your computer and use it in GitHub Desktop.
3D world points from ARKit depth
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ARKit | |
import SceneKit | |
let horizontalPoints = 256 / 2 | |
let verticalPoints = 192 / 2 | |
var depthNodes = [SCNNode]() | |
var parentDebugNodes = SCNNode() | |
var sceneView: ARSCNView! | |
// Somewhere during setup | |
func setup() { | |
let configuration = ARWorldTrackingConfiguration() | |
configuration.frameSemantics = .smoothedSceneDepth | |
sceneView.session.run(configuration) | |
sceneView.scene.rootNode.addChildNode(parentDebugNodes) | |
let sizeGeomPredictions = 0.005 | |
let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0) | |
geom.firstMaterial?.diffuse.contents = UIColor.green | |
for _ in 0..<(horizontalPoints * verticalPoints) { | |
let node = SCNNode(geometry: geom) | |
self.parentDebugNodes.addChildNode(node) | |
self.depthNodes.append(node) | |
} | |
} | |
func session(_ session: ARSession, didUpdate frame: ARFrame) { | |
guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else { | |
return | |
} | |
let capturedImage = frame.capturedImage | |
let lockFlags = CVPixelBufferLockFlags.readOnly | |
CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags) | |
defer { | |
CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags) | |
} | |
let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)! | |
let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self) | |
// The `.size` accessor simply read the CVPixelBuffer's width and height in pixels. | |
// | |
// They are the same ratio: | |
// 1920 x 1440 = 1440 x 1920 = 0.75 | |
let depthMapSize = smoothedDepth.size | |
// 192 x 256 = 0.75 | |
let capturedImageSize = capturedImage.size | |
var cameraIntrinsics = frame.camera.intrinsics | |
let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y)) | |
let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x, | |
y: Float(capturedImageSize.y) / depthResolution.y ) | |
// Make the camera intrinsics be with respect to Depth. | |
cameraIntrinsics[0][0] /= scaleRes.x | |
cameraIntrinsics[1][1] /= scaleRes.y | |
cameraIntrinsics[2][0] /= scaleRes.x | |
cameraIntrinsics[2][1] /= scaleRes.y | |
// This will be the long size, because of the rotation | |
let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints) | |
let halfHorizontalStep = horizontalStep / 2 | |
// This will be the short size, because of the rotation | |
let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints) | |
let halfVerticalStep = verticalStep / 2 | |
for h in 0..<horizontalPoints { | |
for v in 0..<verticalPoints { | |
let x = Float(h) * horizontalStep + halfHorizontalStep | |
let y = Float(v) * verticalStep + halfVerticalStep | |
let depthMapPoint = simd_float2(x, y) | |
// Sample depth | |
let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint)) | |
let wp = worldPoint(depthMapPixelPoint: depthMapPoint, | |
depth: metricDepth, | |
cameraIntrinsics: cameraIntrinsics, | |
// This is crucial: you need to always use the view matrix for Landscape Right. | |
viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse) | |
let node = self.depthNodes[v * horizontalPoints + h] | |
node.simdWorldPosition = wp | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float { | |
let baseAddressIndex = at.y * size.x + at.x | |
return Float(pointer[baseAddressIndex]) | |
} | |
// This also works. Adapted from: | |
// https://developer.apple.com/forums/thread/676368 | |
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> { | |
let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth | |
let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z) | |
let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1) | |
return (worldPoint / worldPoint.w)[SIMD3(0,1,2)] | |
} | |
// This one is adapted from: | |
// http://nicolas.burrus.name/index.php/Research/KinectCalibration | |
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> { | |
let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0]) | |
let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1] | |
// Y is UP in camera space, vs it being DOWN in image space. | |
let localPoint = simd_float3(xrw, -yrw, -depth) | |
let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1) | |
return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z) | |
} | |
extension CVPixelBuffer { | |
var size: SIMD2<Int> { | |
let width = CVPixelBufferGetWidthOfPlane(self, 0) | |
let height = CVPixelBufferGetHeightOfPlane(self, 0) | |
return .init(x: width, y: height) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It looks great, and thanks for posting the rest of your code!