Skip to content

Instantly share code, notes, and snippets.

@snowzurfer
Last active July 12, 2024 07:39
Show Gist options
  • Save snowzurfer/1e90678d0d23d3295dda9a0cc93b2453 to your computer and use it in GitHub Desktop.
Save snowzurfer/1e90678d0d23d3295dda9a0cc93b2453 to your computer and use it in GitHub Desktop.
3D world points from ARKit depth
import ARKit
import SceneKit
let horizontalPoints = 256 / 2
let verticalPoints = 192 / 2
var depthNodes = [SCNNode]()
var parentDebugNodes = SCNNode()
var sceneView: ARSCNView!
// Somewhere during setup
func setup() {
let configuration = ARWorldTrackingConfiguration()
configuration.frameSemantics = .smoothedSceneDepth
sceneView.session.run(configuration)
sceneView.scene.rootNode.addChildNode(parentDebugNodes)
let sizeGeomPredictions = 0.005
let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0)
geom.firstMaterial?.diffuse.contents = UIColor.green
for _ in 0..<(horizontalPoints * verticalPoints) {
let node = SCNNode(geometry: geom)
self.parentDebugNodes.addChildNode(node)
self.depthNodes.append(node)
}
}
func session(_ session: ARSession, didUpdate frame: ARFrame) {
guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else {
return
}
let capturedImage = frame.capturedImage
let lockFlags = CVPixelBufferLockFlags.readOnly
CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags)
defer {
CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags)
}
let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)!
let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self)
// The `.size` accessor simply read the CVPixelBuffer's width and height in pixels.
//
// They are the same ratio:
// 1920 x 1440 = 1440 x 1920 = 0.75
let depthMapSize = smoothedDepth.size
// 192 x 256 = 0.75
let capturedImageSize = capturedImage.size
var cameraIntrinsics = frame.camera.intrinsics
let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y))
let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x,
y: Float(capturedImageSize.y) / depthResolution.y )
// Make the camera intrinsics be with respect to Depth.
cameraIntrinsics[0][0] /= scaleRes.x
cameraIntrinsics[1][1] /= scaleRes.y
cameraIntrinsics[2][0] /= scaleRes.x
cameraIntrinsics[2][1] /= scaleRes.y
// This will be the long size, because of the rotation
let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints)
let halfHorizontalStep = horizontalStep / 2
// This will be the short size, because of the rotation
let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints)
let halfVerticalStep = verticalStep / 2
for h in 0..<horizontalPoints {
for v in 0..<verticalPoints {
let x = Float(h) * horizontalStep + halfHorizontalStep
let y = Float(v) * verticalStep + halfVerticalStep
let depthMapPoint = simd_float2(x, y)
// Sample depth
let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint))
let wp = worldPoint(depthMapPixelPoint: depthMapPoint,
depth: metricDepth,
cameraIntrinsics: cameraIntrinsics,
// This is crucial: you need to always use the view matrix for Landscape Right.
viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse)
let node = self.depthNodes[v * horizontalPoints + h]
node.simdWorldPosition = wp
}
}
}
func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float {
let baseAddressIndex = at.y * size.x + at.x
return Float(pointer[baseAddressIndex])
}
// This also works. Adapted from:
// https://developer.apple.com/forums/thread/676368
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth
let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z)
let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1)
return (worldPoint / worldPoint.w)[SIMD3(0,1,2)]
}
// This one is adapted from:
// http://nicolas.burrus.name/index.php/Research/KinectCalibration
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0])
let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1]
// Y is UP in camera space, vs it being DOWN in image space.
let localPoint = simd_float3(xrw, -yrw, -depth)
let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1)
return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z)
}
extension CVPixelBuffer {
var size: SIMD2<Int> {
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
return .init(x: width, y: height)
}
}
@snowzurfer
Copy link
Author

It looks great, and thanks for posting the rest of your code!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment