-
-
Save tcantenot/c15a014aad4621a9b3da36e476438909 to your computer and use it in GitHub Desktop.
Depth buffer raymarching for contact shadows, SSGI, SSR, etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2023 Tomasz Stachowiak | |
// | |
// This contribution is dual licensed under EITHER OF | |
// | |
// Apache License, Version 2.0, (http://www.apache.org/licenses/LICENSE-2.0) | |
// MIT license (http://opensource.org/licenses/MIT) | |
// | |
// at your option. | |
#include "/inc/frame_constants.hlsl" | |
#include "/inc/uv.hlsl" | |
#include "/inc/samplers.hlsl" | |
struct RootFinderInput { | |
/// Distance to the surface of which a root we're trying to find | |
float distance; | |
/// Whether to consider this sample valid for intersection. | |
/// Mostly relevant for allowing the ray marcher to travel behind surfaces, | |
/// as it will mark surfaces it travels under as invalid. | |
bool valid; | |
}; | |
template<typename Point> | |
struct HybridRootFinder { | |
uint linear_steps; | |
uint bisection_steps; | |
bool use_secant; | |
float jitter; | |
float min_t; | |
float max_t; | |
static HybridRootFinder new_with_linear_steps(uint v) { | |
HybridRootFinder res; | |
res.linear_steps = v; | |
res.bisection_steps = 0; | |
res.use_secant = false; | |
res.jitter = 1; | |
res.min_t = 0; | |
res.max_t = 1; | |
return res; | |
} | |
HybridRootFinder with_bisection_steps(uint v) { | |
HybridRootFinder res = this; | |
res.bisection_steps = v; | |
return res; | |
} | |
HybridRootFinder with_use_secant(bool v) { | |
HybridRootFinder res = this; | |
res.use_secant = v; | |
return res; | |
} | |
HybridRootFinder with_jitter(float v) { | |
HybridRootFinder res = this; | |
res.jitter = v; | |
return res; | |
} | |
HybridRootFinder with_min_t(uint v) { | |
HybridRootFinder res = this; | |
res.min_t = v; | |
return res; | |
} | |
HybridRootFinder with_max_t(uint v) { | |
HybridRootFinder res = this; | |
res.max_t = v; | |
return res; | |
} | |
// IntersectionFn: (Point) -> DistanceTy, where `DistanceTy` inherits from `RootFinderInput`. | |
template<typename IntersectionFn, typename DistanceTy> | |
bool find_root(Point start, Point end, IntersectionFn distance_fn, inout float hit_t, out DistanceTy hit_d) { | |
const Point dir = end - start; | |
float min_t = this.min_t; | |
float max_t = this.max_t; | |
DistanceTy min_d = (DistanceTy)0; | |
DistanceTy max_d = (DistanceTy)0; | |
const float step_size = (max_t - min_t) / linear_steps; | |
bool intersected = false; | |
// | |
// Ray march using linear steps | |
if (linear_steps > 0) { | |
const float candidate_t = min_t + step_size * jitter; | |
const Point candidate = start + dir * candidate_t; | |
const DistanceTy candidate_d = distance_fn(candidate); | |
intersected = candidate_d.distance < 0 && candidate_d.valid; | |
if (intersected) { | |
max_t = candidate_t; | |
max_d = candidate_d; | |
// The `[min_t .. max_t]` interval contains an intersection. End the linear search. | |
} else { | |
// No intersection yet. Carry on. | |
min_t = candidate_t; | |
min_d = candidate_d; | |
for (uint step = 1; step < linear_steps; ++step) { | |
const float candidate_t = min_t + step_size; | |
const Point candidate = start + dir * candidate_t; | |
const DistanceTy candidate_d = distance_fn(candidate); | |
intersected = candidate_d.distance < 0 && candidate_d.valid; | |
if (intersected) { | |
max_t = candidate_t; | |
max_d = candidate_d; | |
// The `[min_t .. max_t]` interval contains an intersection. End the linear search. | |
break; | |
} else { | |
// No intersection yet. Carry on. | |
min_t = candidate_t; | |
min_d = candidate_d; | |
} | |
} | |
} | |
} | |
// | |
// Refine the hit using bisection | |
if (intersected) { | |
for (uint step = 0; step < bisection_steps; ++step) { | |
const float mid_t = (min_t + max_t) * 0.5; | |
const Point candidate = start + dir * mid_t; | |
const DistanceTy candidate_d = distance_fn(candidate); | |
if (candidate_d.distance < 0 && candidate_d.valid) { | |
// Intersection at the mid point. Refine the first half. | |
max_t = mid_t; | |
max_d = candidate_d; | |
} else { | |
// No intersection yet at the mid point. Refine the second half. | |
min_t = mid_t; | |
min_d = candidate_d; | |
} | |
} | |
if (this.use_secant) { | |
// Finish with one application of the secant method | |
const float total_d = min_d.distance + -max_d.distance; | |
const float mid_t = lerp(min_t, max_t, min_d.distance / total_d); | |
const Point candidate = start + dir * mid_t; | |
const DistanceTy candidate_d = distance_fn(candidate); | |
// Only accept the result of the secant method if it improves upon the previous result. | |
// | |
// Technically this should be `abs(candidate_d.distance) < min(min_d.distance, -max_d.distance) * frac`, | |
// but this seems sufficient. | |
if (abs(candidate_d.distance) < min_d.distance * 0.9 && candidate_d.valid) { | |
hit_t = mid_t; | |
hit_d = candidate_d; | |
} else { | |
hit_t = max_t; | |
hit_d = max_d; | |
} | |
return true; | |
} else { | |
hit_t = max_t; | |
hit_d = max_d; | |
return true; | |
} | |
} else { | |
// Mark the conservative miss distance. | |
hit_t = min_t; | |
return false; | |
} | |
} | |
}; | |
struct DistanceWithPenetration: RootFinderInput { | |
/// Conservative estimate of depth to which the ray penetrates the marched surface. | |
float penetration; | |
}; | |
struct DepthRaymarchDistanceFn { | |
Texture2D<float> depth_tex; | |
float2 depth_tex_size; | |
bool march_behind_surfaces; | |
float depth_thickness; | |
DistanceWithPenetration operator()(float3 ray_point_cs) { | |
const float2 interp_uv = cs_to_uv(ray_point_cs.xy); | |
const float ray_depth = 1.0 / ray_point_cs.z; | |
// We're using both point-sampled and bilinear-filtered values from the depth buffer. | |
// | |
// That's really stupid but works like magic. For samples taken near the ray origin, | |
// the discrete nature of the depth buffer becomes a problem. It's not a land of continuous surfaces, | |
// but a bunch of stacked duplo bricks. | |
// | |
// Technically we should be taking discrete steps in this duplo land, but then we're at the mercy | |
// of arbitrary quantization of our directions -- and sometimes we'll take a step which would | |
// claim that the ray is occluded -- even though the underlying smooth surface wouldn't occlude it. | |
// | |
// If we instead take linear taps from the depth buffer, we reconstruct the linear surface. | |
// That fixes acne, but introduces false shadowing near object boundaries, as we now pretend | |
// that everything is shrink-wrapped by this continuous 2.5D surface, and our depth thickness | |
// heuristic ends up falling apart. | |
// | |
// The fix is to consider both the smooth and the discrete surfaces, and only claim occlusion | |
// when the ray descends below both. | |
// | |
// The two approaches end up fixing each other's artifacts: | |
// * The false occlusions due to duplo land are rejected because the ray stays above the smooth surface. | |
// * The shrink-wrap surface is no longer continuous, so it's possible for rays to miss it. | |
#if 1 | |
const float linear_depth = 1.0 / depth_tex.SampleLevel(sampler_llc, interp_uv, 0); | |
const float unfiltered_depth = 1.0 / depth_tex.SampleLevel(sampler_nnc, interp_uv, 0); | |
#else | |
// Manual bilinear; in case using a linear sampler on the depth format is not supported. | |
const float4 depth_vals = depth_tex.GatherRed(sampler_nnc, interp_uv, 0); | |
const float2 frac_px = frac(interp_uv * depth_tex_size - 0.5.xx); | |
const float linear_depth = 1.0 / lerp( | |
lerp(depth_vals.w, depth_vals.z, frac_px.x), | |
lerp(depth_vals.x, depth_vals.y, frac_px.x), | |
frac_px.y | |
); | |
const float unfiltered_depth = 1.0 / depth_tex.SampleLevel(sampler_nnc, interp_uv, 0); | |
#endif | |
#if 1 | |
const float max_depth = max(linear_depth, unfiltered_depth); | |
const float min_depth = min(linear_depth, unfiltered_depth); | |
#else | |
// Just for sanity checking. Should not be used. | |
const float max_depth = unfiltered_depth; | |
const float min_depth = unfiltered_depth; | |
#endif | |
const float bias = 0.000002; | |
DistanceWithPenetration res; | |
res.distance = max_depth * (1.0 + bias) - ray_depth; | |
// This will be used at the end of the ray march to potentially discard the hit. | |
res.penetration = ray_depth - min_depth; | |
if (this.march_behind_surfaces) { | |
res.valid = res.penetration < this.depth_thickness; | |
} else { | |
res.valid = true; | |
} | |
return res; | |
} | |
}; | |
struct DepthRayMarchResult { | |
/// True if the raymarch hit something. | |
bool hit; | |
/// Approximate UV of the hit. | |
float2 hit_uv; | |
/// The distance that the hit point penetrates into the hit surface. | |
/// Will normally be non-zero due to limited precision of the ray march. | |
float hit_penetration; | |
/// Ditto, within the range `0..DepthRayMarch::depth_thickness_linear_z` | |
float hit_penetration_frac; | |
/// In case of a hit, the normalized distance to it. | |
/// | |
/// In case of a miss, the furthest the ray managed to travel, which could either be | |
/// exceeding the max range, or getting behind a surface further than the depth thickness. | |
/// | |
/// Range: `0..=1` as a lerp factor over `ray_start_cs..=ray_end_cs`. | |
float hit_t; | |
}; | |
struct DepthRayMarch { | |
/// Number of steps to be taken at regular intervals to find an initial intersection. | |
/// Must not be zero. | |
uint linear_steps; | |
/// Number of steps in a bisection (binary search) to perform once the linear search | |
/// has found an intersection. Helps narrow down the hit, increasing the chance of | |
/// the secant method finding an accurate hit point. | |
/// | |
/// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows. | |
uint bisection_steps; | |
/// Approximate the root position using the secant method -- by solving for line-line | |
/// intersection between the ray approach rate and the surface gradient. | |
/// | |
/// Useful when sampling color, e.g. SSR or SSGI, but pointless for contact shadows. | |
bool use_secant; | |
/// Jitter to apply to the first step of the linear search; 0..=1 range, mapping | |
/// to the extent of a single linear step in the first phase of the search. | |
/// Use 1.0 if you don't want jitter. | |
float jitter; | |
/// Clip space coordinates (w=1) of the ray. | |
float3 ray_start_cs; | |
float3 ray_end_cs; | |
/// Should be used for contact shadows, but not for any color bounce, e.g. SSR. | |
/// | |
/// For SSR etc. this can easily create leaks, but with contact shadows it allows the rays | |
/// to pass over invalid occlusions (due to thickness), and find potentially valid ones ahead. | |
/// | |
/// Note that this will cause the linear search to potentially miss surfaces, | |
/// because when the ray overshoots and ends up penetrating a surface further than | |
/// `depth_thickness_linear_z`, the ray marcher will just carry on. | |
/// | |
/// For this reason, this may require a lot of samples, or high depth thickness, | |
/// so that `depth_thickness_linear_z >= world space ray length / linear_steps`. | |
bool march_behind_surfaces; | |
/// When marching the depth buffer, we only have 2.5D information, and don't know how | |
/// thick surfaces are. We shall assume that the depth buffer fragments are litte squares | |
/// with a constant thickness defined by this parameter. | |
float depth_thickness_linear_z; | |
/// Size of the depth buffer we're marching in, in pixels. | |
float2 depth_tex_size; | |
/// The depth buffer. Must use reverse Z with an infinite far plane. | |
Texture2D<float> depth_tex; | |
static DepthRayMarch new_from_depth( | |
Texture2D<float> depth_tex, | |
float2 depth_tex_size | |
) { | |
DepthRayMarch res = (DepthRayMarch)0; | |
res.jitter = 1; | |
res.linear_steps = 4; | |
res.bisection_steps = 0; | |
res.depth_tex = depth_tex; | |
res.depth_tex_size = depth_tex_size; | |
res.depth_thickness_linear_z = 1; | |
res.march_behind_surfaces = false; | |
return res; | |
} | |
DepthRayMarch with_linear_steps(uint v) { | |
DepthRayMarch res = this; | |
res.linear_steps = v; | |
return res; | |
} | |
DepthRayMarch with_bisection_steps(uint v) { | |
DepthRayMarch res = this; | |
res.bisection_steps = v; | |
return res; | |
} | |
DepthRayMarch with_use_secant(bool v) { | |
DepthRayMarch res = this; | |
res.use_secant = v; | |
return res; | |
} | |
DepthRayMarch with_jitter(float v) { | |
DepthRayMarch res = this; | |
res.jitter = v; | |
return res; | |
} | |
DepthRayMarch with_depth_thickness(float v) { | |
DepthRayMarch res = this; | |
res.depth_thickness_linear_z = v; | |
return res; | |
} | |
DepthRayMarch with_march_behind_surfaces(bool v) { | |
DepthRayMarch res = this; | |
res.march_behind_surfaces = v; | |
return res; | |
} | |
/// March from a clip-space position (w = 1) | |
DepthRayMarch from_cs(float3 v) { | |
DepthRayMarch res = this; | |
res.ray_start_cs = v; | |
return res; | |
} | |
/// March towards a clip-space direction. | |
/// If `infinite` is `true`, then the ray is extended to cover the whole view frustum. | |
/// If `infinite` is `false`, then the ray length is that of the `dir_cs` parameter. | |
// | |
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. | |
DepthRayMarch to_cs_dir_impl(float4 dir_cs, bool infinite) { | |
float4 end_cs = float4(this.ray_start_cs, 1) + dir_cs; | |
end_cs /= end_cs.w; | |
float3 delta_cs = end_cs.xyz - this.ray_start_cs; | |
// Clip the ray to the frustum | |
const float2 dist_to_edge = (sign(delta_cs.xy) - this.ray_start_cs.xy) / delta_cs.xy; | |
const float min_dist_to_edge = min(dist_to_edge.x, dist_to_edge.y); | |
if (infinite) { | |
delta_cs *= min_dist_to_edge; | |
} else { | |
// If unbounded, would make the ray reach the end of the frustum | |
delta_cs *= min(1.0, min_dist_to_edge); | |
} | |
DepthRayMarch res = this; | |
res.ray_end_cs = this.ray_start_cs + delta_cs; | |
return res; | |
} | |
/// March to a clip-space position (w = 1) | |
/// | |
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. | |
DepthRayMarch to_cs(float3 end_cs) { | |
float4 dir = float4(end_cs - this.ray_start_cs, 0) * sign(end_cs.z); | |
return this.to_cs_dir_impl(dir, false); | |
} | |
/// March towards a clip-space direction. Infinite (ray is extended to cover the whole view frustum). | |
/// | |
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. | |
DepthRayMarch to_cs_dir(float4 dir) { | |
return this.to_cs_dir_impl(dir, true); | |
} | |
/// March to a world-space position. | |
/// | |
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. | |
DepthRayMarch to_ws(float3 end) { | |
return this.to_cs(main_view().position_world_to_sample(end)); | |
} | |
/// March towards a world-space direction. Infinite (ray is extended to cover the whole view frustum). | |
/// | |
/// Must be called after `from_cs`, as it will clip the world-space ray to the view frustum. | |
DepthRayMarch to_ws_dir(float3 dir) { | |
return this.to_cs_dir_impl(main_view().direction_world_to_clip(dir), true); | |
} | |
/// Perform the ray march. | |
DepthRayMarchResult march() { | |
DepthRayMarchResult res = (DepthRayMarchResult)0; | |
const float2 ray_start_uv = cs_to_uv(ray_start_cs.xy); | |
const float2 ray_end_uv = cs_to_uv(ray_end_cs.xy); | |
const float2 ray_uv_delta = ray_end_uv - ray_start_uv; | |
const float2 ray_len_px = ray_uv_delta * depth_tex_size; | |
const uint MIN_PX_PER_STEP = 1; | |
const int step_count = min(linear_steps, int(floor(length(ray_len_px) / MIN_PX_PER_STEP))); | |
const float linear_z_to_scaled_linear_z = main_view().rcp_near_plane_distance(); | |
const float depth_thickness = this.depth_thickness_linear_z * linear_z_to_scaled_linear_z; | |
DepthRaymarchDistanceFn distance_fn; | |
distance_fn.depth_tex = depth_tex; | |
distance_fn.depth_tex_size = depth_tex_size; | |
distance_fn.march_behind_surfaces = this.march_behind_surfaces; | |
distance_fn.depth_thickness = depth_thickness; | |
DistanceWithPenetration hit; | |
bool intersected = HybridRootFinder<float3> | |
::new_with_linear_steps(step_count) | |
.with_bisection_steps(this.bisection_steps) | |
.with_use_secant(this.use_secant) | |
.with_jitter(this.jitter) | |
.find_root(ray_start_cs, ray_end_cs, distance_fn, res.hit_t, hit); | |
if (intersected && hit.penetration < depth_thickness && hit.distance < depth_thickness) { | |
res.hit = true; | |
res.hit_uv = lerp(ray_start_uv, ray_end_uv, res.hit_t); | |
res.hit_penetration = hit.penetration / linear_z_to_scaled_linear_z; | |
res.hit_penetration_frac = hit.penetration / depth_thickness; | |
return res; | |
} | |
return res; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment