Skip to content

Instantly share code, notes, and snippets.

@mildsunrise
Last active August 25, 2025 10:49
Show Gist options
  • Save mildsunrise/d5990a0651009b8c96460c53a3493e03 to your computer and use it in GitHub Desktop.
Save mildsunrise/d5990a0651009b8c96460c53a3493e03 to your computer and use it in GitHub Desktop.
simple 2^n FFT that isn't terribly slow
/** WASM-accelerated version. Except for very small sizes it's usually much faster,
and performs on the same ballpark as PulseFFT */
const wasmSrc = 'AGFzbQEAAAABCQFgBX9/f39/AAMCAQAFAwEAAAcVAghmZnRQaGFzZQAABm1lbW9yeQIACsQBAcEBAgZ/B30gACACdkECdCIGQQFrIgdBf3MhAiAAQQJ0IQADQCAAIAVKBEAgBSAHcSACIAVxIghBAXRyIgkgA2oiCioCACELIAoqAgQhDCAEIAVqIgogCyABIAhqIggqAgAiECADIAYgCWpqIgkqAgAiEZQgCCoCBCINIAkqAgQiDpSTIg+SOAIAIAogDCAQIA6UIA0gEZSSIg2SOAIEIAAgCmoiCCALIA+TOAIAIAggDCANkzgCBCAFQQhqIQUMAQsLCw=='
const wasmMod = new WebAssembly.Module(Uint8Array.from(atob(wasmSrc), x => x.charCodeAt(0)))
const { exports: { memory, fftPhase } } = new WebAssembly.Instance(wasmMod)
// dead simple memory allocator
let memoryCursor = 0
function allocate(size, cls, align=cls.BYTES_PER_ELEMENT) {
memoryCursor += ((-memoryCursor % align) + align) % align
const ptr = memoryCursor
memoryCursor += size * cls.BYTES_PER_ELEMENT
while (memory.buffer.byteLength < memoryCursor)
memory.grow(Math.max(1, memory.buffer.byteLength / (1<<16)))
return Object.assign(() => new cls(memory.buffer, ptr, size), { ptr })
}
/**
* Allocate resources for an FFT of a certain size.
*
* **Warning:** Every call to this function *permanently* allocates resources;
* you're expected to only call it once per size and reuse the returned FFT.
* If this is not desired, improve the simple allocator or use the pure JS version.
*
* @param {number} N - FFT points (must be a power of 2, and at least 2)
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function.
* must be passed an array of length 2*N (real0, imag0, real1, imag1, ...).
* the argument will be left untouched and an array of the same length and layout
* will be returned with the result. the first complex number of the array is DC.
* **the buffer is reused/overwritten on future invocations of the same function.**
* **the buffer can become invalid when future calls to makeFFT itself are made,
* as these can grow the WASM memory and invalidate the underlying ArrayBuffer.**
*/
export default function makeFFT(N) {
if (N !== (N >>> 0))
throw new Error('not an u32')
const Nb = Math.log2(N) | 0
if (N !== (1 << Nb) || Nb < 1)
throw new Error('not a power of two, or too small')
// calculate the twiddle factors
const twiddle = allocate(N, Float32Array, 8)
{
const twiddleArr = twiddle()
for (let i = 0; i < N; i++) {
const arg = - 2 * Math.PI * i / N
twiddleArr[2*i+0] = Math.cos(arg)
twiddleArr[2*i+1] = Math.sin(arg)
}
}
let a = allocate(2*N, Float32Array, 8)
let b = allocate(2*N, Float32Array, 8)
return function fft(src) {
if (src.length !== 2*N)
throw new Error('invalid length')
a().set(src)
for (let idx = 0; idx < Nb; idx++) {
fftPhase(N, twiddle.ptr, idx, a.ptr, b.ptr)
; [a, b] = [b, a]
}
return a()
}
}
// This is the AssemblyScript code that compiled the WASM module embedded above.
// You don't actually need this to use the implementation.
export function fftPhase(
N: i32,
twiddle: usize, // &[u32; N]
idx: i32,
src: usize, // &[u32; 2*N]
dst: usize, // &mut [u32; 2*N]
): void {
N <<= 2
const stride = N >>> idx, mask = stride - 1, nmask = ~mask
for (let o = 0; o < N; o += 2 << 2) {
const i = o & nmask, O = o & mask
const i1 = (i<<1) | O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// if JS had complexes: dst[o+N] = src[i1] - twiddle[i] * src[i2]
const ar = f32.load(src+i1+0), ai = f32.load(src+i1+4)
const tr = f32.load(twiddle+i+0), ti = f32.load(twiddle+i+4)
const sr = f32.load(src+i2+0), si = f32.load(src+i2+4)
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
f32.store(dst+o+0, ar + tsr)
f32.store(dst+o+4, ai + tsi)
f32.store(dst+o+N+0, ar - tsr)
f32.store(dst+o+N+4, ai - tsi)
}
}
/** Pure JS version. */
/**
* Allocate resources for an FFT of a certain size.
* @param {number} N - FFT points (must be a power of 2, and at least 2)
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function.
* must be passed an array of length 2*N (real0, imag0, real1, imag1, ...).
* the argument will be left untouched and an array of the same length and layout
* will be returned with the result. the first complex number of the array is DC.
* **the buffer is reused/overwritten on future invocations of the same function.**
*/
export default function makeFFT(N) {
if (N !== (N >>> 0))
throw new Error('not an u32')
const Nb = Math.log2(N) | 0
if (N !== (1 << Nb) || Nb < 1)
throw new Error('not a power of two, or too small')
// calculate the twiddle factors
const twiddle = new Float32Array(N)
for (let i = 0; i < N/2; i++) {
const arg = - 2 * Math.PI * i / N
twiddle[2*i+0] = Math.cos(arg)
twiddle[2*i+1] = Math.sin(arg)
}
function phase(
/** @type {number} */ idx,
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const stride = 1 << (Nb - idx), mask = stride - 1
for (let o = 0; o < N; o += 2) {
const i = o & ~mask, O = o & mask
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// dst[o+N] = src[i1] - twiddle[i] * src[i2]
const ar = src[i1+0], ai = src[i1+1]
const tr = twiddle[i+0], ti = twiddle[i+1]
const sr = src[i2+0], si = src[i2+1]
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
dst[o+0] = ar + tsr
dst[o+1] = ai + tsi
dst[o+N+0] = ar - tsr
dst[o+N+1] = ai - tsi
}
}
let a = new Float32Array(2*N)
let b = new Float32Array(2*N)
return function fft(src) {
if (src.length !== 2*N)
throw new Error('invalid length')
phase(0, src, a)
for (let idx = 1; idx < Nb; idx++) {
phase(idx, a, b)
; [a, b] = [b, a]
}
return a
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment