Created
June 28, 2021 11:20
-
-
Save edubart/5f95e87d365fd9f9ebe581a1955c2642 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##[[ | |
if GLSL then | |
primtypes.number = primtypes.float32 | |
primtypes.integer = primtypes.cint | |
primtypes.uinteger = primtypes.cuint | |
else | |
primtypes.number = primtypes.float32 | |
primtypes.integer = primtypes.int32 | |
primtypes.uinteger = primtypes.uint32 | |
end | |
]] | |
global float = @number | |
global int = @integer | |
global uint = @uinteger | |
local is_scalar = #[concept(function(x) return x.type.is_scalar end)]# | |
require 'math' | |
function math.rsqrt(x: is_scalar) <inline,nosideeffect> | |
return 1.0 / sqrt(x) | |
end | |
-- Returns 0 if x < edge, and 1 otherwise. | |
function math.step(edge: is_scalar, x: is_scalar) <inline,nosideeffect> | |
x = x < edge and 0 or 1 | |
return x | |
end | |
function math.fastexp(x: float32) <inline,nosideeffect> | |
-- Fast exp (imprecise) | |
-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c | |
local u: union {f: float32, x: int32} = {x = (@int32)(12102203 * x) + 1064866805} | |
return u.f | |
end | |
function math.fastpow(a: float32, b: float32): float32 <inline,nosideeffect> | |
-- Fast pow (precise) | |
-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c | |
local flip: boolean = b < 0 | |
b = flip and -b or b | |
local e: int32 = (@int32)(b) | |
local u: union{f: float32, x: int32} = {f = a} | |
u.x = (@int32)((b - e)*(u.x - 1065353216) + 1065353216) | |
local r: float32 = 1.0 | |
## for i=1,32 do | |
if e == 0 then goto finish end | |
r = e & 1 ~= 0 and r*a or r | |
a = a * a | |
e = e >>> 1 | |
## end | |
::finish:: | |
r = r * u.f | |
return flip and 1.0/r or r | |
end | |
function math.fasterpow(a: float32, b: float32): float32 <inline,nosideeffect> | |
-- Very fast pow (imprecise) | |
-- Reference: https://github.com/ekmett/approximate/blob/master/cbits/fast.c | |
local u: union {f: float32, x: int32} = {f = a} | |
u.x = (@int32)(b * (u.x - 1064866805) + 1064866805) | |
return u.f | |
end | |
function math.fastcbrt(a: float32): float32 <inline,nosideeffect> | |
-- Cube root approximation using 1 iterations of Halley's method | |
if unlikely(a == 0.0) then return 0.0 end | |
local u: union{f: float32, x: uint32} = {f = a} | |
u.x = u.x /// 3_u32 + 709921077_u32 | |
local x: float32 = u.f -- initial guess | |
local x3: float32 = x*x*x | |
x = u.f * (x3 + 2.0*a) / (2.0*x3 + a) | |
x3 = x*x*x | |
return x * (x3 + 2.0*a) / (2.0*x3 + a) | |
end | |
function math.fastercbrt(a: float32): float32 <inline,nosideeffect> | |
if unlikely(a == 0.0) then return 0.0 end | |
-- Cube root approximation using 2 iterations of Halley's method | |
local u: union{f: float32, x: uint32} = {f = a} | |
u.x = u.x /// 3_u32 + 709921077_u32 | |
local x: float32 = u.f -- initial guess | |
local x3: float32 = x*x*x | |
return x * (x3 + 2.0*a) / (2.0*x3 + a) | |
end | |
-- Convert float bit representation to an unsigned integer. | |
function math.floatbits2uint(x: float): uint <inline,nosideeffect> | |
local u: union{f: float, i: uint} = {f = x} | |
return u.i | |
end | |
-- Convert float bit representation to an integer. | |
function math.floatbits2int(x: float): int <inline,nosideeffect> | |
local u: union{f: float, i: int} = {f = x} | |
return u.i | |
end | |
## end | |
-- Returns -1 if x < 0, and 1 otherwise. | |
function math.signb(x: is_scalar) <inline,nosideeffect> | |
return x < 0 and -1 or 1 | |
end | |
function math.floorround(x: is_scalar) <inline,nosideeffect> | |
return math.floor(x + 0.5) | |
end | |
-- Returns cos(acos(x)/3.0) but optimized, usually used to solve cubic equations. | |
-- Reference: https://iquilezles.org/www/articles/trisect/trisect.htm | |
function math.trisect(x: float): float <inline,nosideeffect> | |
x = math.sqrt(0.5+0.5*x) | |
return x*(x*(x*(x*-0.008972+0.039071)-0.107074)+0.576975)+0.5 | |
end | |
-- Like `trisect`, but faster losing precision. | |
-- Reference: https://www.shadertoy.com/view/WltSD7 | |
function math.fasttrisect(x: float): float <inline,nosideeffect> | |
x = math.sqrt(0.5+0.5*x) | |
return x*(-0.064916*x+0.564916)+0.5 | |
end | |
-- Returns arccosine of an scalar. | |
-- Handbook of Mathematical Functions - M. Abramowitz and I.A. Stegun, Ed. | |
-- Reference: https://developer.download.nvidia.com/cg/acos.html | |
function math.fastacos(x: float): float <inline,nosideeffect> | |
local negate: float = x < 0.0 and 1.0 or 0.0 | |
x = math.abs(x) | |
local ret: float = -0.0187293 | |
ret = ret * x | |
ret = ret + 0.0742610 | |
ret = ret * x | |
ret = ret - 0.2121144 | |
ret = ret * x | |
ret = ret + 1.5707288 | |
ret = ret * math.sqrt(1.0-x) | |
ret = ret - 2.0 * negate * ret | |
return negate * 3.14159265358979 + ret | |
end | |
-- Returns arccosine of an scalar. (imprecise) | |
-- Approximation by Sebastien Lagarde. | |
-- Reference: https://www.shadertoy.com/view/lsjXDc | |
function math.fasteracos(x: float): float <inline,nosideeffect> | |
local y: float = math.abs(math.clamp(x,-1.0,1.0)) | |
local b: float = (-0.168577*y + 1.56723) * math.sqrt(1.0 - y) | |
local t: float = math.sign(x) | |
local a: float = 0.5*3.1415927 | |
return a * (1.0-t) + b * t | |
end | |
-- Reference: https://www.shadertoy.com/view/7d23D1 | |
function math.fastsin(x: float): float <inline,nosideeffect> | |
-- Quartic sin approximation (precise), using the following constrains: | |
-- f(0) = 0, f(pi) = 0, f(pi/2) = 1, f'(0) = 1, f'(pi) = -1, f'(pi/2) = 0 | |
local line = x*#[1/math.pi]# | |
local stair = math.floor(line) | |
local saw = line - stair | |
local wave = saw*(saw*(saw*(saw*#[16 - 4*math.pi]# + #[8*math.pi - 32]#) + #[16 - 5*math.pi]#) + #[math.pi]#) | |
local signal = (1.0-2.0*(stair - 2.0*math.floor(0.5*line))) | |
return signal*wave | |
end | |
function math.fastcos(x: float): float <inline,nosideeffect> | |
return math.fastsin(x + #[math.pi/2]#) | |
end | |
function math.qsin(x: float): float <inline,nosideeffect> | |
-- Quadratic sin approximation (imprecise), using the following constrains: | |
-- f(0) = 0, f(pi) = 0, f(pi/2) = 1 | |
local line = x*#[1/math.pi]# | |
local stair = math.floor(line) | |
local saw = line - stair | |
local wave = 4.0*saw*(1.0-saw) | |
local signal = (1.0-2.0*(stair - 2.0*math.floor(0.5*line))) | |
return signal*wave | |
end | |
function math.qcos(x: float): float <inline,nosideeffect> | |
return math.qsin(x + #[math.pi/2]#) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment