PiMaker · October 15, 2023 18:55
diff --git a/RayClouds.shader b/RayClouds.shader
 // Copyright © 2023 github.com/pimaker
 // Available under the terms of the MIT license.

 Shader "_pi_/RayClouds"
 {
    Properties
    {
        _BaseTint ("Base Tint", Color) = (1, 1, 1, 0.1)

        _NoiseTex ("3D Noise", 3D) = "white" {}

        _NoiseWeights ("Noise Weights", Vector) = (0.5, 0.4, 0.1, 0.25)
        _NoiseThreshold ("Noise Threshold", Range(0, 1)) = 0.55
        [PowerSlider(4)] _NoiseMultiplier ("Noise Multiplier", Range(0, 0.5)) = 0.2
        [PowerSlider(4)] _NoiseScale ("Noise Scale", Range(0, 0.01)) = 0.00005
        _MovementSpeedX ("Movement Speed X", Float) = 0.03
        _MovementSpeedZ ("Movement Speed Z", Float) = 0.03

        _FadeVertical ("Fade Vertical", Range(0, 1)) = 0.2
        _FadeHorizontal ("Fade Horizontal", Range(0, 1)) = 0.1

        _ForwardScattering ("Forward Scattering", Range(0, 1)) = 0.83
        _BackScattering ("Back Scattering", Range(-1, 0)) = -0.55
        [PowerSlider(4)] _BaseBrightness ("Base Brightness", Range(0, 1)) = 0.01
        _PhaseFactor ("Phase Factor", Range(0, 1)) = 0.34
        _AbsorptionInCloud ("Absorption In Cloud", Range(0, 2)) = 0.75
        _AbsorptionForLight ("Absorption For Light", Range(0, 2)) = 0.75
        _FakeBrightnessAdditive ("Transmittance Additive", Range(0, 1)) = 0.05

        _Thickening ("Alpha Thickening Factor", Range(0, 1)) = 0.15
        _CockpitOcclusionDistance ("Cockpit Occlusion Distance", Float) = 3.0

        _Steps ("Steps (Quality)", Int) = 128
        //[Toggle(MIP_QUAD_OPTIMIZATION)] _MipQuadOptimization ("Mip Quad Optimization (Experimental)", Float) = 1.0
        [Toggle(GRADIENT_LIGHTING)] _GradientLighting ("Gradient Lighting (Experimental)", Float) = 1.0
    }
    SubShader
    {
        Tags { "RenderType"="Transparent" "Queue"="Transparent-100" "IgnoreProjector"="True" }

        Cull Front
        Blend SrcAlpha OneMinusSrcAlpha
        ZWrite Off
        ZTest Always

        Pass
        {
            Tags { "LightMode" = "ForwardBase" }

            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag
            //#pragma multi_compile_instancing

            #pragma target 5.0
            //#pragma shader_feature_local MIP_QUAD_OPTIMIZATION
            #pragma shader_feature_local GRADIENT_LIGHTING

            #include "UnityCG.cginc"
            #include "UnityLightingCommon.cginc"

            #define MIP_QUAD_OPTIMIZATION 1

            struct appdata
            {
                float4 vertex : POSITION;

                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            struct v2f
            {
                float4 vertex : SV_POSITION;
                float3 grabPos : GRABPOS;
                float3 screenPos : SCREENPOS;
                float3 ray : RAY;

                nointerpolation float3 scale : SCALE;
                nointerpolation float3 origin : ORIGIN;

                UNITY_VERTEX_OUTPUT_STEREO
                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);

            Texture3D<float4> _NoiseTex;
            SamplerState sampler_NoiseTex;

            // adapted from AudioLink
            extern float _Udon_RayClouds_NetworkTime;
            static float _NetworkTime = _Udon_RayClouds_NetworkTime;

            static float3 camera_pos = unity_CameraToWorld._m03_m13_m23;

            float3 scaleFromMatrix(float4x4 m)
            {
                float sx = length(float3(m[0][0], m[0][1], m[0][2]));
                float sy = length(float3(m[1][0], m[1][1], m[1][2]));
                float sz = length(float3(m[2][0], m[2][1], m[2][2]));

                // if determinant is negative, we need to invert one scale
                float det = determinant(m);
                if (det < 0) {
                    sx = -sx;
                }

                return float3(sx, sy, sz);
            }

            float positionFromMatrix(float4x4 m)
            {
                return float3(m[3][0], m[3][1], m[3][2]);
            }

            // from: https://github.com/lukis101/VRCUnityStuffs/blob/master/Shaders/DJL/Overlays/WorldPosOblique.shader
            inline float4 CalculateFrustrumCorrection()
            {
                float x1 = -UNITY_MATRIX_P._31/(UNITY_MATRIX_P._11*UNITY_MATRIX_P._34);
                float x2 = -UNITY_MATRIX_P._32/(UNITY_MATRIX_P._22*UNITY_MATRIX_P._34);
                return float4(x1, x2, 0, UNITY_MATRIX_P._33/UNITY_MATRIX_P._34 + x1*UNITY_MATRIX_P._13 + x2*UNITY_MATRIX_P._23);
            }
            inline float CorrectedLinearEyeDepth(float z, float B)
            {
                return 1.0 / (z/UNITY_MATRIX_P._34 + B);
            }

            v2f vert (appdata v)
            {
                v2f o;

                UNITY_SETUP_INSTANCE_ID(v);
                UNITY_INITIALIZE_OUTPUT(v2f, o);
                UNITY_TRANSFER_INSTANCE_ID(v, o);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);

                float4 worldPos = mul(unity_ObjectToWorld, v.vertex);
                o.vertex = mul(UNITY_MATRIX_VP, worldPos);
                o.ray.xyz = worldPos.xyz - camera_pos.xyz;
                o.scale = scaleFromMatrix(unity_ObjectToWorld) * 0.5f; // half-scale for boxIntersection extents
                o.origin = mul(unity_ObjectToWorld, float4(0, 0, 0, 1)).xyz;

                o.grabPos.xy = ComputeGrabScreenPos(o.vertex).xy;
                o.grabPos.z = dot(o.vertex, CalculateFrustrumCorrection());
                o.screenPos = ComputeNonStereoScreenPos(o.vertex).xyw;

                return o;
            }

            // fragment shader inputs
            uniform half4 _BaseTint;

            uniform float _ForwardScattering;
            uniform float _BackScattering;
            uniform float _BaseBrightness;
            uniform float _PhaseFactor;

            uniform float4 _NoiseWeights;
            uniform float _NoiseThreshold;
            uniform float _NoiseMultiplier;
            uniform float _NoiseScale;
            uniform float _MovementSpeedX;
            uniform float _MovementSpeedZ;

            uniform float _FadeVertical;
            uniform float _FadeHorizontal;

            uniform float _AbsorptionInCloud;
            uniform float _AbsorptionForLight;
            uniform float _FakeBrightnessAdditive;

            uniform float _Thickening;
            uniform float _CockpitOcclusionDistance;
            uniform uint _Steps;

            // calculated static props
            static float3 noiseWeights;
            static float2 movementSpeed;

            static float fadeVertical;
            static float fadeHorizontal;

            static float steps;
            static float3 origin;
            static float3 scale;

            static float2 pixel_quad_dir;

            static float3 lightDir = normalize(_WorldSpaceLightPos0.xyz);
            static float3 lightCol = _LightColor0;
            static float phaseVal;

            // praise bgolus, in-quad communication for 2x2 pixel grids abusing hw derivatives
            // we also need the value from the diagonal neighbor in our quad
            // we can use the derivative of the "other" value acquired before to get that
            #define inquad_fns(type) \
                void inquad(type value, out type val_other_x, out type val_other_y, out type val_diag) \
                { \
                    type val_dx = ddx_fine(value); \
                    type val_dy = ddy_fine(value); \
                    val_other_x = value - val_dx * pixel_quad_dir.x; \
                    val_other_y = value - val_dy * pixel_quad_dir.y; \
                    type val_other_dx = ddx_fine(val_other_y); \
                    val_diag = val_other_y - val_other_dx * pixel_quad_dir.x; \
                } \
                type inquad_sum(type value) \
                { \
                    type val_other_x, val_other_y, val_diag; \
                    inquad(value, val_other_x, val_other_y, val_diag); \
                    return value + val_other_x + val_other_y + val_diag; \
                } \
                type inquad_avg(type value) \
                { \
                    return inquad_sum(value) * 0.25f; \
                } \
                type inquad_max(type value) \
                { \
                    type val_other_x, val_other_y, val_diag; \
                    inquad(value, val_other_x, val_other_y, val_diag); \
                    return max(value, max(val_other_x, max(val_other_y, val_diag))); \
                } \
                type inquad_min(type value) \
                { \
                    type val_other_x, val_other_y, val_diag; \
                    inquad(value, val_other_x, val_other_y, val_diag); \
                    return min(value, min(val_other_x, min(val_other_y, val_diag))); \
                }
            inquad_fns(float)
            inquad_fns(float2)
            inquad_fns(float3)
            inquad_fns(float4)
            #undef inquad_fns

            // from: https://iquilezles.org/articles/intersectors
            float2 boxIntersection(in float3 ro, in float3 rd, float3 boxSize)
            {
                float3 m = 1.0f/rd;
                float3 n = m*ro;
                float3 k = abs(m)*boxSize;
                float3 t1 = -n - k;
                float3 t2 = -n + k;
                float tN = max(max(t1.x, t1.y), t1.z);
                float tF = min(min(t2.x, t2.y), t2.z);
                if (tN > tF || tF < 0.0f) return (float2)-1.0f;
                return float2(tN, tF);
            }

            // Henyey-Greenstein
            // adapted from: https://github.com/SebLague/Clouds/blob/master/Assets/Scripts/Clouds/Shaders/Clouds.shader
            float hg(float a, float g)
            {
                float g2 = g*g;
                return (1.0f-g2) / (4.0f*UNITY_PI*pow(1.0f+g2-2.0f*g*a, 1.5f));
            }
            float phase(float a)
            {
                float blend = 0.5f;
                float hgBlend = hg(a,_ForwardScattering) * (1-blend) + hg(a,_BackScattering) * blend;
                return hgBlend*_PhaseFactor;
            }

            float cloudDensity(float3 pos, float3 move, float sn, float2 hori)
            {
                float3 raw = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale + move, 0);
                float3 raw2 = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale * 1.8186757f + move * 0.5f + 0.1f, 0);
                raw = max(raw, raw2);
                float noise = dot(raw, noiseWeights);

                float vert = smoothstep(scale.y, scale.y - fadeVertical, abs(pos.y));
                float fade = min(vert, min(hori.x, hori.y));

                float towerMult = 1.0f + (1.0f - vert) * saturate(0.75f - sn);

                float threshold = _NoiseThreshold * towerMult;
                float thresholded = saturate(noise - threshold) * (1.0f / (1.0f - threshold));

                return thresholded * _NoiseMultiplier * fade*fade*fade * lerp(0.8f, 1.2f, sn);
            }

            /*float lightstep(float3 cur, float stepSize)
            {
                float density = 0.0f;
                const uint steps = 4;

                for (uint i = 0; i < steps; i++)
                {
                    cur += lightDir * stepSize;
                    density += cloudDensity(cur) * stepSize;
                }

                return density;
            }*/

            // x = density, y = brightness
            float2 raymarch(float3 cur, float stepSize, float dist, float3 dir)
            {
                float density = 0.0f;
                float brightness = 0.0f;

                float3 lightOffset = lightDir * stepSize * 2.0f;
                float3 move = float3(_NetworkTime * movementSpeed.x, 0, _NetworkTime * movementSpeed.y);

                [loop]
                for (float travelled = 0; travelled < dist; travelled += stepSize)
                {
                    // increase importance of near samples
                    float stepWeight = stepSize * (1.0f + exp(-0.005f * travelled));

                    float sn = sin(cur.x * 0.005f + cur.z * 0.005f);
                    float2 hori = smoothstep(scale.xz, scale.xz - fadeHorizontal, length(cur.xz - camera_pos.xz));

                    float curDensity = cloudDensity(cur, move, sn, hori) * stepWeight;
                    density += curDensity;

                    float curTransmittance = exp(-_AbsorptionForLight * density);
                    #if GRADIENT_LIGHTING
                        float gradientDensity = cloudDensity(cur + lightOffset, move, sn, hori);
                    #else
                        float gradientDensity = curDensity*curDensity*curDensity*0.05f;
                    #endif
                    float gradient = curDensity - gradientDensity * stepWeight;
                    brightness += curTransmittance * stepWeight * max(0.0f, gradient) * 6.5f;

                    cur += stepSize * dir;
                }

                return float2(density, brightness);
            }

            half4 frag (v2f i) : SV_Target
            {
                UNITY_SETUP_INSTANCE_ID(i);
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);

                #if MIP_QUAD_OPTIMIZATION
                    float2 screenSize = _ScreenParams.xy;
                    float2 screenUv = floor(i.screenPos.xy / i.screenPos.z * screenSize);
                    uint2 pixelPos = screenUv % uint2(2, 2);
                    uint pixelIdx = pixelPos.x + pixelPos.y * 2;
                    int2 pixel_quad_pos = int2(pixelPos.xy) % 2;
                    pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
                #endif

                // setup
                origin = i.origin;
                scale = i.scale;

                movementSpeed = float2(_MovementSpeedX, _MovementSpeedZ);

                float4 weights = _NoiseWeights;
                weights.y += sin(_NetworkTime * movementSpeed.x) * weights.w; // add some time for good measure (extra noise)
                noiseWeights = weights.xyz / dot(weights.xyz, 1); // normalized

                fadeVertical = _FadeVertical * scale.y;
                fadeHorizontal = _FadeHorizontal * max(scale.x, scale.z);

                steps = _Steps;
                steps = min(512, max(2, steps)); // safety

                // view ray
                float3 rayDir = normalize((i.ray.xyz / i.vertex.w).xyz);
                #if MIP_QUAD_OPTIMIZATION
                    rayDir = inquad_avg(rayDir);
                #endif

                float2 endpoints = boxIntersection(camera_pos - float3(origin.x, origin.y, origin.z), rayDir, scale);
                //if (endpoints.y < 0) discard; // behind the box ???
                bool inside = endpoints.x < 0;
                if (inside)
                    endpoints.x = _ProjectionParams.y; // inside the box, start at near clip

                // sample depth texture
                float perspectiveDivide = 1.0f / i.vertex.w;
                float2 screenpos = i.grabPos.xy * perspectiveDivide;
                float z = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, screenpos);
                #if !UNITY_REVERSED_Z
                    z = 1 - z;
                #endif
                float depthFromCamera = CorrectedLinearEyeDepth(z, i.grabPos.z * perspectiveDivide);

                #if !MIP_QUAD_OPTIMIZATION
                    // discard if something is in front of cloud border
                    if (depthFromCamera < endpoints.x)
                        discard;
                #endif

                // stop march at objects within clouds
                float near = endpoints.x;
                float far = endpoints.y;
                far = min(far, depthFromCamera);

                float dist = far - near;

                // max view distance
                float maxScale = max(scale.x, scale.z);
                float vert = smoothstep(scale.y, scale.y - fadeVertical * 1.25f, abs(camera_pos.y - origin.y));
                dist *= 1.0f - vert * 0.4f;

                // more steps on longer distances, decently cheap in normal conditions
                float stepMult = 1.0f + smoothstep(500.0f, maxScale, dist) * 5.0f;
                steps *= stepMult;

                // step size, determines quality and artifacting, but also base performance
                float stepSize = dist / steps;

                // increase step size for optimization
                float farAway = smoothstep(500.0f, maxScale, near) * 2.25f; // far away
                float screenDistance = length(i.screenPos.xy / i.screenPos.z - 0.5f); // fixed foveation
                stepSize *= 1.0f + max(farAway, screenDistance * 0.75f);

                #if MIP_QUAD_OPTIMIZATION
                    // ray interleaving within mip quad
                    float forward = stepSize * pixelIdx;
                    near += forward;
                    dist += forward;
                    stepSize *= 4.0f; // this is why it's faster
                #endif

                float3 start = camera_pos - float3(0, origin.y, 0) + near * rayDir;
                float2 result = raymarch(start, stepSize, dist, rayDir);
                float density = result.x;
                float brightness = result.y;

                #if MIP_QUAD_OPTIMIZATION
                    // combine pixel results
                    density = inquad_avg(density);
                    brightness = inquad_avg(brightness);

                    // depth discard (must be after any inquad_fns with MIP_QUAD_OPTIMIZATION)
                    if (depthFromCamera < endpoints.x)
                        discard;
                #endif

                float transmittance = pow(exp(-density), _AbsorptionInCloud);

                // Phase function makes clouds brighter around sun (with bent normals for more pronounced effect)
                phaseVal = phase(dot(normalize(rayDir + lightDir), lightDir));

                // more brightness when above clouds
                phaseVal += 0.1f * saturate((camera_pos.y - origin.y + scale.y*0.6f) * 0.0025f) * exp(transmittance) + (1.0f - transmittance) * _FakeBrightnessAdditive;

                half4 color;
                float distCorrection = 0.08f + 0.92f * lerp(saturate(abs(dot(rayDir, float3(0, -1, 0)))), 0.2f, vert);
                color.rgb = lerp(_BaseTint.rgb + (brightness * 0.01f * _BaseBrightness * distCorrection * (1.0f - transmittance)), lightCol.rgb, saturate(transmittance + phaseVal));

                // calculate alpha to occlude sun and other objects
                color.a = 1.0f - saturate(transmittance - _Thickening * (1 - transmittance));

                // cockpit occlusion
                color.a *= smoothstep(_CockpitOcclusionDistance, _CockpitOcclusionDistance * 1.5f, depthFromCamera);

                return color;
            }

            ENDCG
        }
    }
 }
	// Copyright © 2023 github.com/pimaker
	// Available under the terms of the MIT license.

	Shader "_pi_/RayClouds"
	{
	Properties
	{
	_BaseTint ("Base Tint", Color) = (1, 1, 1, 0.1)

	_NoiseTex ("3D Noise", 3D) = "white" {}

	_NoiseWeights ("Noise Weights", Vector) = (0.5, 0.4, 0.1, 0.25)
	_NoiseThreshold ("Noise Threshold", Range(0, 1)) = 0.55
	[PowerSlider(4)] _NoiseMultiplier ("Noise Multiplier", Range(0, 0.5)) = 0.2
	[PowerSlider(4)] _NoiseScale ("Noise Scale", Range(0, 0.01)) = 0.00005
	_MovementSpeedX ("Movement Speed X", Float) = 0.03
	_MovementSpeedZ ("Movement Speed Z", Float) = 0.03

	_FadeVertical ("Fade Vertical", Range(0, 1)) = 0.2
	_FadeHorizontal ("Fade Horizontal", Range(0, 1)) = 0.1

	_ForwardScattering ("Forward Scattering", Range(0, 1)) = 0.83
	_BackScattering ("Back Scattering", Range(-1, 0)) = -0.55
	[PowerSlider(4)] _BaseBrightness ("Base Brightness", Range(0, 1)) = 0.01
	_PhaseFactor ("Phase Factor", Range(0, 1)) = 0.34
	_AbsorptionInCloud ("Absorption In Cloud", Range(0, 2)) = 0.75
	_AbsorptionForLight ("Absorption For Light", Range(0, 2)) = 0.75
	_FakeBrightnessAdditive ("Transmittance Additive", Range(0, 1)) = 0.05

	_Thickening ("Alpha Thickening Factor", Range(0, 1)) = 0.15
	_CockpitOcclusionDistance ("Cockpit Occlusion Distance", Float) = 3.0

	_Steps ("Steps (Quality)", Int) = 128
	//[Toggle(MIP_QUAD_OPTIMIZATION)] _MipQuadOptimization ("Mip Quad Optimization (Experimental)", Float) = 1.0
	[Toggle(GRADIENT_LIGHTING)] _GradientLighting ("Gradient Lighting (Experimental)", Float) = 1.0
	}
	SubShader
	{
	Tags { "RenderType"="Transparent" "Queue"="Transparent-100" "IgnoreProjector"="True" }

	Cull Front
	Blend SrcAlpha OneMinusSrcAlpha
	ZWrite Off
	ZTest Always

	Pass
	{
	Tags { "LightMode" = "ForwardBase" }

	CGPROGRAM
	#pragma vertex vert
	#pragma fragment frag
	//#pragma multi_compile_instancing

	#pragma target 5.0
	//#pragma shader_feature_local MIP_QUAD_OPTIMIZATION
	#pragma shader_feature_local GRADIENT_LIGHTING

	#include "UnityCG.cginc"
	#include "UnityLightingCommon.cginc"

	#define MIP_QUAD_OPTIMIZATION 1

	struct appdata
	{
	float4 vertex : POSITION;

	UNITY_VERTEX_INPUT_INSTANCE_ID
	};

	struct v2f
	{
	float4 vertex : SV_POSITION;
	float3 grabPos : GRABPOS;
	float3 screenPos : SCREENPOS;
	float3 ray : RAY;

	nointerpolation float3 scale : SCALE;
	nointerpolation float3 origin : ORIGIN;

	UNITY_VERTEX_OUTPUT_STEREO
	UNITY_VERTEX_INPUT_INSTANCE_ID
	};

	UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);

	Texture3D<float4> _NoiseTex;
	SamplerState sampler_NoiseTex;

	// adapted from AudioLink
	extern float _Udon_RayClouds_NetworkTime;
	static float _NetworkTime = _Udon_RayClouds_NetworkTime;

	static float3 camera_pos = unity_CameraToWorld._m03_m13_m23;

	float3 scaleFromMatrix(float4x4 m)
	{
	float sx = length(float3(m[0][0], m[0][1], m[0][2]));
	float sy = length(float3(m[1][0], m[1][1], m[1][2]));
	float sz = length(float3(m[2][0], m[2][1], m[2][2]));

	// if determinant is negative, we need to invert one scale
	float det = determinant(m);
	if (det < 0) {
	sx = -sx;
	}

	return float3(sx, sy, sz);
	}

	float positionFromMatrix(float4x4 m)
	{
	return float3(m[3][0], m[3][1], m[3][2]);
	}

	// from: https://github.com/lukis101/VRCUnityStuffs/blob/master/Shaders/DJL/Overlays/WorldPosOblique.shader
	inline float4 CalculateFrustrumCorrection()
	{
	float x1 = -UNITY_MATRIX_P._31/(UNITY_MATRIX_P._11*UNITY_MATRIX_P._34);
	float x2 = -UNITY_MATRIX_P._32/(UNITY_MATRIX_P._22*UNITY_MATRIX_P._34);
	return float4(x1, x2, 0, UNITY_MATRIX_P._33/UNITY_MATRIX_P._34 + x1UNITY_MATRIX_P._13 + x2UNITY_MATRIX_P._23);
	}
	inline float CorrectedLinearEyeDepth(float z, float B)
	{
	return 1.0 / (z/UNITY_MATRIX_P._34 + B);
	}

	v2f vert (appdata v)
	{
	v2f o;

	UNITY_SETUP_INSTANCE_ID(v);
	UNITY_INITIALIZE_OUTPUT(v2f, o);
	UNITY_TRANSFER_INSTANCE_ID(v, o);
	UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);

	float4 worldPos = mul(unity_ObjectToWorld, v.vertex);
	o.vertex = mul(UNITY_MATRIX_VP, worldPos);
	o.ray.xyz = worldPos.xyz - camera_pos.xyz;
	o.scale = scaleFromMatrix(unity_ObjectToWorld) * 0.5f; // half-scale for boxIntersection extents
	o.origin = mul(unity_ObjectToWorld, float4(0, 0, 0, 1)).xyz;

	o.grabPos.xy = ComputeGrabScreenPos(o.vertex).xy;
	o.grabPos.z = dot(o.vertex, CalculateFrustrumCorrection());
	o.screenPos = ComputeNonStereoScreenPos(o.vertex).xyw;

	return o;
	}

	// fragment shader inputs
	uniform half4 _BaseTint;

	uniform float _ForwardScattering;
	uniform float _BackScattering;
	uniform float _BaseBrightness;
	uniform float _PhaseFactor;

	uniform float4 _NoiseWeights;
	uniform float _NoiseThreshold;
	uniform float _NoiseMultiplier;
	uniform float _NoiseScale;
	uniform float _MovementSpeedX;
	uniform float _MovementSpeedZ;

	uniform float _FadeVertical;
	uniform float _FadeHorizontal;

	uniform float _AbsorptionInCloud;
	uniform float _AbsorptionForLight;
	uniform float _FakeBrightnessAdditive;

	uniform float _Thickening;
	uniform float _CockpitOcclusionDistance;
	uniform uint _Steps;

	// calculated static props
	static float3 noiseWeights;
	static float2 movementSpeed;

	static float fadeVertical;
	static float fadeHorizontal;

	static float steps;
	static float3 origin;
	static float3 scale;

	static float2 pixel_quad_dir;

	static float3 lightDir = normalize(_WorldSpaceLightPos0.xyz);
	static float3 lightCol = _LightColor0;
	static float phaseVal;

	// praise bgolus, in-quad communication for 2x2 pixel grids abusing hw derivatives
	// we also need the value from the diagonal neighbor in our quad
	// we can use the derivative of the "other" value acquired before to get that
	#define inquad_fns(type) \
	void inquad(type value, out type val_other_x, out type val_other_y, out type val_diag) \
	{ \
	type val_dx = ddx_fine(value); \
	type val_dy = ddy_fine(value); \
	val_other_x = value - val_dx * pixel_quad_dir.x; \
	val_other_y = value - val_dy * pixel_quad_dir.y; \
	type val_other_dx = ddx_fine(val_other_y); \
	val_diag = val_other_y - val_other_dx * pixel_quad_dir.x; \
	} \
	type inquad_sum(type value) \
	{ \
	type val_other_x, val_other_y, val_diag; \
	inquad(value, val_other_x, val_other_y, val_diag); \
	return value + val_other_x + val_other_y + val_diag; \
	} \
	type inquad_avg(type value) \
	{ \
	return inquad_sum(value) * 0.25f; \
	} \
	type inquad_max(type value) \
	{ \
	type val_other_x, val_other_y, val_diag; \
	inquad(value, val_other_x, val_other_y, val_diag); \
	return max(value, max(val_other_x, max(val_other_y, val_diag))); \
	} \
	type inquad_min(type value) \
	{ \
	type val_other_x, val_other_y, val_diag; \
	inquad(value, val_other_x, val_other_y, val_diag); \
	return min(value, min(val_other_x, min(val_other_y, val_diag))); \
	}
	inquad_fns(float)
	inquad_fns(float2)
	inquad_fns(float3)
	inquad_fns(float4)
	#undef inquad_fns

	// from: https://iquilezles.org/articles/intersectors
	float2 boxIntersection(in float3 ro, in float3 rd, float3 boxSize)
	{
	float3 m = 1.0f/rd;
	float3 n = m*ro;
	float3 k = abs(m)*boxSize;
	float3 t1 = -n - k;
	float3 t2 = -n + k;
	float tN = max(max(t1.x, t1.y), t1.z);
	float tF = min(min(t2.x, t2.y), t2.z);
	if (tN > tF \|\| tF < 0.0f) return (float2)-1.0f;
	return float2(tN, tF);
	}

	// Henyey-Greenstein
	// adapted from: https://github.com/SebLague/Clouds/blob/master/Assets/Scripts/Clouds/Shaders/Clouds.shader
	float hg(float a, float g)
	{
	float g2 = g*g;
	return (1.0f-g2) / (4.0fUNITY_PIpow(1.0f+g2-2.0fga, 1.5f));
	}
	float phase(float a)
	{
	float blend = 0.5f;
	float hgBlend = hg(a,_ForwardScattering) * (1-blend) + hg(a,_BackScattering) * blend;
	return hgBlend*_PhaseFactor;
	}

	float cloudDensity(float3 pos, float3 move, float sn, float2 hori)
	{
	float3 raw = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale + move, 0);
	float3 raw2 = _NoiseTex.SampleLevel(sampler_NoiseTex, pos * _NoiseScale * 1.8186757f + move * 0.5f + 0.1f, 0);
	raw = max(raw, raw2);
	float noise = dot(raw, noiseWeights);

	float vert = smoothstep(scale.y, scale.y - fadeVertical, abs(pos.y));
	float fade = min(vert, min(hori.x, hori.y));

	float towerMult = 1.0f + (1.0f - vert) * saturate(0.75f - sn);

	float threshold = _NoiseThreshold * towerMult;
	float thresholded = saturate(noise - threshold) * (1.0f / (1.0f - threshold));

	return thresholded * _NoiseMultiplier * fadefadefade * lerp(0.8f, 1.2f, sn);
	}

	/*float lightstep(float3 cur, float stepSize)
	{
	float density = 0.0f;
	const uint steps = 4;

	for (uint i = 0; i < steps; i++)
	{
	cur += lightDir * stepSize;
	density += cloudDensity(cur) * stepSize;
	}

	return density;
	}*/

	// x = density, y = brightness
	float2 raymarch(float3 cur, float stepSize, float dist, float3 dir)
	{
	float density = 0.0f;
	float brightness = 0.0f;

	float3 lightOffset = lightDir * stepSize * 2.0f;
	float3 move = float3(_NetworkTime * movementSpeed.x, 0, _NetworkTime * movementSpeed.y);

	[loop]
	for (float travelled = 0; travelled < dist; travelled += stepSize)
	{
	// increase importance of near samples
	float stepWeight = stepSize * (1.0f + exp(-0.005f * travelled));

	float sn = sin(cur.x * 0.005f + cur.z * 0.005f);
	float2 hori = smoothstep(scale.xz, scale.xz - fadeHorizontal, length(cur.xz - camera_pos.xz));

	float curDensity = cloudDensity(cur, move, sn, hori) * stepWeight;
	density += curDensity;

	float curTransmittance = exp(-_AbsorptionForLight * density);
	#if GRADIENT_LIGHTING
	float gradientDensity = cloudDensity(cur + lightOffset, move, sn, hori);
	#else
	float gradientDensity = curDensitycurDensitycurDensity*0.05f;
	#endif
	float gradient = curDensity - gradientDensity * stepWeight;
	brightness += curTransmittance * stepWeight * max(0.0f, gradient) * 6.5f;

	cur += stepSize * dir;
	}

	return float2(density, brightness);
	}

	half4 frag (v2f i) : SV_Target
	{
	UNITY_SETUP_INSTANCE_ID(i);
	UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);

	#if MIP_QUAD_OPTIMIZATION
	float2 screenSize = _ScreenParams.xy;
	float2 screenUv = floor(i.screenPos.xy / i.screenPos.z * screenSize);
	uint2 pixelPos = screenUv % uint2(2, 2);
	uint pixelIdx = pixelPos.x + pixelPos.y * 2;
	int2 pixel_quad_pos = int2(pixelPos.xy) % 2;
	pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
	#endif

	// setup
	origin = i.origin;
	scale = i.scale;

	movementSpeed = float2(_MovementSpeedX, _MovementSpeedZ);

	float4 weights = _NoiseWeights;
	weights.y += sin(_NetworkTime * movementSpeed.x) * weights.w; // add some time for good measure (extra noise)
	noiseWeights = weights.xyz / dot(weights.xyz, 1); // normalized

	fadeVertical = _FadeVertical * scale.y;
	fadeHorizontal = _FadeHorizontal * max(scale.x, scale.z);

	steps = _Steps;
	steps = min(512, max(2, steps)); // safety

	// view ray
	float3 rayDir = normalize((i.ray.xyz / i.vertex.w).xyz);
	#if MIP_QUAD_OPTIMIZATION
	rayDir = inquad_avg(rayDir);
	#endif

	float2 endpoints = boxIntersection(camera_pos - float3(origin.x, origin.y, origin.z), rayDir, scale);
	//if (endpoints.y < 0) discard; // behind the box ???
	bool inside = endpoints.x < 0;
	if (inside)
	endpoints.x = _ProjectionParams.y; // inside the box, start at near clip

	// sample depth texture
	float perspectiveDivide = 1.0f / i.vertex.w;
	float2 screenpos = i.grabPos.xy * perspectiveDivide;
	float z = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, screenpos);
	#if !UNITY_REVERSED_Z
	z = 1 - z;
	#endif
	float depthFromCamera = CorrectedLinearEyeDepth(z, i.grabPos.z * perspectiveDivide);

	#if !MIP_QUAD_OPTIMIZATION
	// discard if something is in front of cloud border
	if (depthFromCamera < endpoints.x)
	discard;
	#endif

	// stop march at objects within clouds
	float near = endpoints.x;
	float far = endpoints.y;
	far = min(far, depthFromCamera);

	float dist = far - near;

	// max view distance
	float maxScale = max(scale.x, scale.z);
	float vert = smoothstep(scale.y, scale.y - fadeVertical * 1.25f, abs(camera_pos.y - origin.y));
	dist = 1.0f - vert 0.4f;

	// more steps on longer distances, decently cheap in normal conditions
	float stepMult = 1.0f + smoothstep(500.0f, maxScale, dist) * 5.0f;
	steps *= stepMult;

	// step size, determines quality and artifacting, but also base performance
	float stepSize = dist / steps;

	// increase step size for optimization
	float farAway = smoothstep(500.0f, maxScale, near) * 2.25f; // far away
	float screenDistance = length(i.screenPos.xy / i.screenPos.z - 0.5f); // fixed foveation
	stepSize = 1.0f + max(farAway, screenDistance 0.75f);

	#if MIP_QUAD_OPTIMIZATION
	// ray interleaving within mip quad
	float forward = stepSize * pixelIdx;
	near += forward;
	dist += forward;
	stepSize *= 4.0f; // this is why it's faster
	#endif

	float3 start = camera_pos - float3(0, origin.y, 0) + near * rayDir;
	float2 result = raymarch(start, stepSize, dist, rayDir);
	float density = result.x;
	float brightness = result.y;

	#if MIP_QUAD_OPTIMIZATION
	// combine pixel results
	density = inquad_avg(density);
	brightness = inquad_avg(brightness);

	// depth discard (must be after any inquad_fns with MIP_QUAD_OPTIMIZATION)
	if (depthFromCamera < endpoints.x)
	discard;
	#endif

	float transmittance = pow(exp(-density), _AbsorptionInCloud);

	// Phase function makes clouds brighter around sun (with bent normals for more pronounced effect)
	phaseVal = phase(dot(normalize(rayDir + lightDir), lightDir));

	// more brightness when above clouds
	phaseVal += 0.1f * saturate((camera_pos.y - origin.y + scale.y0.6f) 0.0025f) * exp(transmittance) + (1.0f - transmittance) * _FakeBrightnessAdditive;

	half4 color;
	float distCorrection = 0.08f + 0.92f * lerp(saturate(abs(dot(rayDir, float3(0, -1, 0)))), 0.2f, vert);
	color.rgb = lerp(_BaseTint.rgb + (brightness * 0.01f * _BaseBrightness * distCorrection * (1.0f - transmittance)), lightCol.rgb, saturate(transmittance + phaseVal));

	// calculate alpha to occlude sun and other objects
	color.a = 1.0f - saturate(transmittance - _Thickening * (1 - transmittance));

	// cockpit occlusion
	color.a = smoothstep(_CockpitOcclusionDistance, _CockpitOcclusionDistance 1.5f, depthFromCamera);

	return color;
	}

	ENDCG
	}
	}
	}