Skip to content

Instantly share code, notes, and snippets.

@PiMaker
Created October 15, 2023 18:55
Show Gist options
  • Save PiMaker/3344dc9957e20d4c8525446b465bc022 to your computer and use it in GitHub Desktop.
Save PiMaker/3344dc9957e20d4c8525446b465bc022 to your computer and use it in GitHub Desktop.
Raymarched clouds with mip-quad communication for rendering performance.
// Copyright © 2023 github.com/pimaker
// Available under the terms of the MIT license.
Shader "_pi_/RayClouds"
{
Properties
{
_BaseTint ("Base Tint", Color) = (1, 1, 1, 0.1)
_NoiseTex ("3D Noise", 3D) = "white" {}
_NoiseWeights ("Noise Weights", Vector) = (0.5, 0.4, 0.1, 0.25)
_NoiseThreshold ("Noise Threshold", Range(0, 1)) = 0.55
[PowerSlider(4)] _NoiseMultiplier ("Noise Multiplier", Range(0, 0.5)) = 0.2
[PowerSlider(4)] _NoiseScale ("Noise Scale", Range(0, 0.01)) = 0.00005
_MovementSpeedX ("Movement Speed X", Float) = 0.03
_MovementSpeedZ ("Movement Speed Z", Float) = 0.03
_FadeVertical ("Fade Vertical", Range(0, 1)) = 0.2
_FadeHorizontal ("Fade Horizontal", Range(0, 1)) = 0.1
_ForwardScattering ("Forward Scattering", Range(0, 1)) = 0.83
_BackScattering ("Back Scattering", Range(-1, 0)) = -0.55
[PowerSlider(4)] _BaseBrightness ("Base Brightness", Range(0, 1)) = 0.01
_PhaseFactor ("Phase Factor", Range(0, 1)) = 0.34
_AbsorptionInCloud ("Absorption In Cloud", Range(0, 2)) = 0.75
_AbsorptionForLight ("Absorption For Light", Range(0, 2)) = 0.75
_FakeBrightnessAdditive ("Transmittance Additive", Range(0, 1)) = 0.05
_Thickening ("Alpha Thickening Factor", Range(0, 1)) = 0.15
_CockpitOcclusionDistance ("Cockpit Occlusion Distance", Float) = 3.0
_Steps ("Steps (Quality)", Int) = 128
//[Toggle(MIP_QUAD_OPTIMIZATION)] _MipQuadOptimization ("Mip Quad Optimization (Experimental)", Float) = 1.0
[Toggle(GRADIENT_LIGHTING)] _GradientLighting ("Gradient Lighting (Experimental)", Float) = 1.0
}
SubShader
{
Tags { "RenderType"="Transparent" "Queue"="Transparent-100" "IgnoreProjector"="True" }
Cull Front
Blend SrcAlpha OneMinusSrcAlpha
ZWrite Off
ZTest Always
Pass
{
Tags { "LightMode" = "ForwardBase" }
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
//#pragma multi_compile_instancing
#pragma target 5.0
//#pragma shader_feature_local MIP_QUAD_OPTIMIZATION
#pragma shader_feature_local GRADIENT_LIGHTING
#include "UnityCG.cginc"
#include "UnityLightingCommon.cginc"
#define MIP_QUAD_OPTIMIZATION 1
struct appdata
{
float4 vertex : POSITION;
UNITY_VERTEX_INPUT_INSTANCE_ID
};
struct v2f
{
float4 vertex : SV_POSITION;
float3 grabPos : GRABPOS;
float3 screenPos : SCREENPOS;
float3 ray : RAY;
nointerpolation float3 scale : SCALE;
nointerpolation float3 origin : ORIGIN;
UNITY_VERTEX_OUTPUT_STEREO
UNITY_VERTEX_INPUT_INSTANCE_ID
};
UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
Texture3D<float4> _NoiseTex;
SamplerState sampler_NoiseTex;
// adapted from AudioLink
extern float _Udon_RayClouds_NetworkTime;
static float _NetworkTime = _Udon_RayClouds_NetworkTime;
static float3 camera_pos = unity_CameraToWorld._m03_m13_m23;
float3 scaleFromMatrix(float4x4 m)
{
float sx = length(float3(m[0][0], m[0][1], m[0][2]));
float sy = length(float3(m[1][0], m[1][1], m[1][2]));
float sz = length(float3(m[2][0], m[2][1], m[2][2]));
// if determinant is negative, we need to invert one scale
float det = determinant(m);
if (det < 0) {
sx = -sx;
}
return float3(sx, sy, sz);
}
float positionFromMatrix(float4x4 m)
{
return float3(m[3][0], m[3][1], m[3][2]);
}
// from: https://github.com/lukis101/VRCUnityStuffs/blob/master/Shaders/DJL/Overlays/WorldPosOblique.shader
inline float4 CalculateFrustrumCorrection()
{
float x1 = -UNITY_MATRIX_P._31/(UNITY_MATRIX_P._11*UNITY_MATRIX_P._34);
float x2 = -UNITY_MATRIX_P._32/(UNITY_MATRIX_P._22*UNITY_MATRIX_P._34);
return float4(x1, x2, 0, UNITY_MATRIX_P._33/UNITY_MATRIX_P._34 + x1*UNITY_MATRIX_P._13 + x2*UNITY_MATRIX_P._23);
}
inline float CorrectedLinearEyeDepth(float z, float B)
{
return 1.0 / (z/UNITY_MATRIX_P._34 + B);
}
v2f vert (appdata v)
{
v2f o;
UNITY_SETUP_INSTANCE_ID(v);
UNITY_INITIALIZE_OUTPUT(v2f, o);
UNITY_TRANSFER_INSTANCE_ID(v, o);
UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);
float4 worldPos = mul(unity_ObjectToWorld, v.vertex);
o.vertex = mul(UNITY_MATRIX_VP, worldPos);
o.ray.xyz = worldPos.xyz - camera_pos.xyz;
o.scale = scaleFromMatrix(unity_ObjectToWorld) * 0.5f; // half-scale for boxIntersection extents
o.origin = mul(unity_ObjectToWorld, float4(0, 0, 0, 1)).xyz;
o.grabPos.xy = ComputeGrabScreenPos(o.vertex).xy;
o.grabPos.z = dot(o.vertex, CalculateFrustrumCorrection());
o.screenPos = ComputeNonStereoScreenPos(o.vertex).xyw;
return o;
}
// fragment shader inputs
uniform half4 _BaseTint;
uniform float _ForwardScattering;
uniform float _BackScattering;
uniform float _BaseBrightness;
uniform float _PhaseFactor;
uniform float4 _NoiseWeights;
uniform float _NoiseThreshold;
uniform float _NoiseMultiplier;
uniform float _NoiseScale;
uniform float _MovementSpeedX;
uniform float _MovementSpeedZ;
uniform float _FadeVertical;
uniform float _FadeHorizontal;
uniform float _AbsorptionInCloud;
uniform float _AbsorptionForLight;
uniform float _FakeBrightnessAdditive;
uniform float _Thickening;
uniform float _CockpitOcclusionDistance;
uniform uint _Steps;
// calculated static props
static float3 noiseWeights;
static float2 movementSpeed;
static float fadeVertical;
static float fadeHorizontal;
static float steps;
static float3 origin;
static float3 scale;
static float2 pixel_quad_dir;
static float3 lightDir = normalize(_WorldSpaceLightPos0.xyz);
static float3 lightCol = _LightColor0;
static float phaseVal;
// praise bgolus, in-quad communication for 2x2 pixel grids abusing hw derivatives
// we also need the value from the diagonal neighbor in our quad
// we can use the derivative of the "other" value acquired before to get that
#define inquad_fns(type) \
void inquad(type value, out type val_other_x, out type val_other_y, out type val_diag) \
{ \
type val_dx = ddx_fine(value); \
type val_dy = ddy_fine(value); \
val_other_x = value - val_dx * pixel_quad_dir.x; \
val_other_y = value - val_dy * pixel_quad_dir.y; \
type val_other_dx = ddx_fine(val_other_y); \
val_diag = val_other_y - val_other_dx * pixel_quad_dir.x; \
} \
type inquad_sum(type value) \
{ \
type val_other_x, val_other_y, val_diag; \
inquad(value, val_other_x, val_other_y, val_diag); \
return value + val_other_x + val_other_y + val_diag; \
} \
type inquad_avg(type value) \
{ \
return inquad_sum(value) * 0.25f; \
} \
type inquad_max(type value) \
{ \
type val_other_x, val_other_y, val_diag; \
inquad(value, val_other_x, val_other_y, val_diag); \
return max(value, max(val_other_x, max(val_other_y, val_diag))); \
} \
type inquad_min(type value) \
{ \
type val_other_x, val_other_y, val_diag; \
inquad(value, val_other_x, val_other_y, val_diag); \
return min(value, min(val_other_x, min(val_other_y, val_diag))); \
}
inquad_fns(float)
inquad_fns(float2)
inquad_fns(float3)
inquad_fns(float4)
#undef inquad_fns
// from: https://iquilezles.org/articles/intersectors
float2 boxIntersection(in float3 ro, in float3 rd, float3 boxSize)
{
float3 m = 1.0f/rd;
float3 n = m*ro;
float3 k = abs(m)*boxSize;
float3 t1 = -n - k;
float3 t2 = -n + k;
float tN = max(max(t1.x, t1.y), t1.z);
float tF = min(min(t2.x, t2.y), t2.z);
if (tN > tF || tF < 0.0f) return (float2)-1.0f;
return float2(tN, tF);
}
// Henyey-Greenstein
// adapted from: https://github.com/SebLague/Clouds/blob/master/Assets/Scripts/Clouds/Shaders/Clouds.shader
float hg(float a, float g)
{
float g2 = g*g;
return (1.0f-g2) / (4.0f*UNITY_PI*pow(1.0f+g2-2.0f*g*a, 1.5f));
}
float phase(float a)
{
float blend = 0.5f;
float hgBlend = hg(a,_ForwardScattering) * (1-blend) + hg(a,_BackScattering) * blend;
return hgBlend*_PhaseFactor;
}
float cloudDensity(float3 pos, float3 move, float sn, float2 hori)
{
float3 raw = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale + move, 0);
float3 raw2 = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale * 1.8186757f + move * 0.5f + 0.1f, 0);
raw = max(raw, raw2);
float noise = dot(raw, noiseWeights);
float vert = smoothstep(scale.y, scale.y - fadeVertical, abs(pos.y));
float fade = min(vert, min(hori.x, hori.y));
float towerMult = 1.0f + (1.0f - vert) * saturate(0.75f - sn);
float threshold = _NoiseThreshold * towerMult;
float thresholded = saturate(noise - threshold) * (1.0f / (1.0f - threshold));
return thresholded * _NoiseMultiplier * fade*fade*fade * lerp(0.8f, 1.2f, sn);
}
/*float lightstep(float3 cur, float stepSize)
{
float density = 0.0f;
const uint steps = 4;
for (uint i = 0; i < steps; i++)
{
cur += lightDir * stepSize;
density += cloudDensity(cur) * stepSize;
}
return density;
}*/
// x = density, y = brightness
float2 raymarch(float3 cur, float stepSize, float dist, float3 dir)
{
float density = 0.0f;
float brightness = 0.0f;
float3 lightOffset = lightDir * stepSize * 2.0f;
float3 move = float3(_NetworkTime * movementSpeed.x, 0, _NetworkTime * movementSpeed.y);
[loop]
for (float travelled = 0; travelled < dist; travelled += stepSize)
{
// increase importance of near samples
float stepWeight = stepSize * (1.0f + exp(-0.005f * travelled));
float sn = sin(cur.x * 0.005f + cur.z * 0.005f);
float2 hori = smoothstep(scale.xz, scale.xz - fadeHorizontal, length(cur.xz - camera_pos.xz));
float curDensity = cloudDensity(cur, move, sn, hori) * stepWeight;
density += curDensity;
float curTransmittance = exp(-_AbsorptionForLight * density);
#if GRADIENT_LIGHTING
float gradientDensity = cloudDensity(cur + lightOffset, move, sn, hori);
#else
float gradientDensity = curDensity*curDensity*curDensity*0.05f;
#endif
float gradient = curDensity - gradientDensity * stepWeight;
brightness += curTransmittance * stepWeight * max(0.0f, gradient) * 6.5f;
cur += stepSize * dir;
}
return float2(density, brightness);
}
half4 frag (v2f i) : SV_Target
{
UNITY_SETUP_INSTANCE_ID(i);
UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
#if MIP_QUAD_OPTIMIZATION
float2 screenSize = _ScreenParams.xy;
float2 screenUv = floor(i.screenPos.xy / i.screenPos.z * screenSize);
uint2 pixelPos = screenUv % uint2(2, 2);
uint pixelIdx = pixelPos.x + pixelPos.y * 2;
int2 pixel_quad_pos = int2(pixelPos.xy) % 2;
pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
#endif
// setup
origin = i.origin;
scale = i.scale;
movementSpeed = float2(_MovementSpeedX, _MovementSpeedZ);
float4 weights = _NoiseWeights;
weights.y += sin(_NetworkTime * movementSpeed.x) * weights.w; // add some time for good measure (extra noise)
noiseWeights = weights.xyz / dot(weights.xyz, 1); // normalized
fadeVertical = _FadeVertical * scale.y;
fadeHorizontal = _FadeHorizontal * max(scale.x, scale.z);
steps = _Steps;
steps = min(512, max(2, steps)); // safety
// view ray
float3 rayDir = normalize((i.ray.xyz / i.vertex.w).xyz);
#if MIP_QUAD_OPTIMIZATION
rayDir = inquad_avg(rayDir);
#endif
float2 endpoints = boxIntersection(camera_pos - float3(origin.x, origin.y, origin.z), rayDir, scale);
//if (endpoints.y < 0) discard; // behind the box ???
bool inside = endpoints.x < 0;
if (inside)
endpoints.x = _ProjectionParams.y; // inside the box, start at near clip
// sample depth texture
float perspectiveDivide = 1.0f / i.vertex.w;
float2 screenpos = i.grabPos.xy * perspectiveDivide;
float z = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, screenpos);
#if !UNITY_REVERSED_Z
z = 1 - z;
#endif
float depthFromCamera = CorrectedLinearEyeDepth(z, i.grabPos.z * perspectiveDivide);
#if !MIP_QUAD_OPTIMIZATION
// discard if something is in front of cloud border
if (depthFromCamera < endpoints.x)
discard;
#endif
// stop march at objects within clouds
float near = endpoints.x;
float far = endpoints.y;
far = min(far, depthFromCamera);
float dist = far - near;
// max view distance
float maxScale = max(scale.x, scale.z);
float vert = smoothstep(scale.y, scale.y - fadeVertical * 1.25f, abs(camera_pos.y - origin.y));
dist *= 1.0f - vert * 0.4f;
// more steps on longer distances, decently cheap in normal conditions
float stepMult = 1.0f + smoothstep(500.0f, maxScale, dist) * 5.0f;
steps *= stepMult;
// step size, determines quality and artifacting, but also base performance
float stepSize = dist / steps;
// increase step size for optimization
float farAway = smoothstep(500.0f, maxScale, near) * 2.25f; // far away
float screenDistance = length(i.screenPos.xy / i.screenPos.z - 0.5f); // fixed foveation
stepSize *= 1.0f + max(farAway, screenDistance * 0.75f);
#if MIP_QUAD_OPTIMIZATION
// ray interleaving within mip quad
float forward = stepSize * pixelIdx;
near += forward;
dist += forward;
stepSize *= 4.0f; // this is why it's faster
#endif
float3 start = camera_pos - float3(0, origin.y, 0) + near * rayDir;
float2 result = raymarch(start, stepSize, dist, rayDir);
float density = result.x;
float brightness = result.y;
#if MIP_QUAD_OPTIMIZATION
// combine pixel results
density = inquad_avg(density);
brightness = inquad_avg(brightness);
// depth discard (must be after any inquad_fns with MIP_QUAD_OPTIMIZATION)
if (depthFromCamera < endpoints.x)
discard;
#endif
float transmittance = pow(exp(-density), _AbsorptionInCloud);
// Phase function makes clouds brighter around sun (with bent normals for more pronounced effect)
phaseVal = phase(dot(normalize(rayDir + lightDir), lightDir));
// more brightness when above clouds
phaseVal += 0.1f * saturate((camera_pos.y - origin.y + scale.y*0.6f) * 0.0025f) * exp(transmittance) + (1.0f - transmittance) * _FakeBrightnessAdditive;
half4 color;
float distCorrection = 0.08f + 0.92f * lerp(saturate(abs(dot(rayDir, float3(0, -1, 0)))), 0.2f, vert);
color.rgb = lerp(_BaseTint.rgb + (brightness * 0.01f * _BaseBrightness * distCorrection * (1.0f - transmittance)), lightCol.rgb, saturate(transmittance + phaseVal));
// calculate alpha to occlude sun and other objects
color.a = 1.0f - saturate(transmittance - _Thickening * (1 - transmittance));
// cockpit occlusion
color.a *= smoothstep(_CockpitOcclusionDistance, _CockpitOcclusionDistance * 1.5f, depthFromCamera);
return color;
}
ENDCG
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment