Skip to content

Instantly share code, notes, and snippets.

@mildsunrise
Last active September 10, 2025 01:21
Show Gist options
  • Save mildsunrise/d5990a0651009b8c96460c53a3493e03 to your computer and use it in GitHub Desktop.
Save mildsunrise/d5990a0651009b8c96460c53a3493e03 to your computer and use it in GitHub Desktop.
simple 2^n FFT that isn't terribly slow
/** WASM-accelerated version. Except for very small sizes it's usually much faster,
and performs on the same ballpark as PulseFFT. */
const wasmSrc = `
AGFzbQEAAAABDwJgBX9/f39/AGADf39/AAMGBQABAAEABQMBAAAHXwYMY29tcGxleFBoYXNlAAANcmVhbFBoYXNlSW5pdAABCXJlYWxQaGFzZQACFHJlYWxQaGFzZUluaXRJbnZlcnNl
AAMQcmVhbFBoYXNlSW52ZXJzZQAEBm1lbW9yeQIACusHBbwBAgZ/B30gAEECdCIGIAJ2IgJBAWsiB0F/cyEAA0AgBSAGSARAIAUgB3EgACAFcSIIQQF0ciIJIANqIgoqAgAhCyAKKgIE
IQwgBCAFaiIKIAsgASAIaiIIKgIAIhAgAyACIAlqaiIJKgIAIhGUIAgqAgQiDSAJKgIEIg6UkyIPkjgCACAKIAwgECAOlCANIBGUkiINkjgCBCAGIApqIgggCyAPkzgCACAIIAwgDZM4
AgQgBUEIaiEFDAELCwtPAgN/An0gAEEBdCEEA0AgAyAESARAIAIgA0EBdGoiBSABIANqIgAqAgAiBiAAIARqKgIAIgeSOAIAIAUgBiAHkzgCBCADQQRqIQMMAQsLC7wCAgl/B30gAEEB
dCIGIAJBAWt1IgJBAWsiB0F/cyEIA0AgAiAFSgRAIAMgBWoiACoCACEOIAAqAgQhDyADIAIgBWpqIgAqAgAhECAAKgIEIREgBCAFaiIAIA4gEJI4AgAgACAOIBCTOAIEIAAgBmoiACAP
OAIAIAAgEYw4AgQgBUEIaiEFDAELCyAGQQF0IQogAiEAA0AgACAGSARAIAAgB3EiCyAAIAhxIgVBAXRqIgwgA2oiCSoCACERIAkqAgQhDiAAIARqIgkgESABIAVqIg0qAgAiDyADIAIg
DGpqIgwqAgAiEJQgDSoCBCISIAwqAgQiE5STIhSSOAIAIAkgDiAPIBOUIBIgEJSSIg+SOAIEIAQgCiAFayALamoiBSARIBSTOAIAIAUgDyAOkzgCBCAAQQhqIQAMAQsLC08CA38CfSAA
QQF0IQQDQCADIARIBEAgAiADaiIFIAEgA0EBdGoiACoCACIGIAAqAgQiB5I4AgAgBCAFaiAGIAeTOAIAIANBBGohAwwBCwsLzAICCH8GfSAAQQF0IgYgAkEBa3UiAkEBayIHQX9zIQgD
QCACIAVKBEAgAyAFaiIAKgIAIQ0gACAGaiIJKgIAIQ4gCSoCBCEPIAQgBWoiCSANIAAqAgQiEJI4AgAgBCACIAVqaiIAIA0gEJM4AgAgCSAOIA6SOAIEIAAgD0MAAADAlDgCBCAFQQhq
IQUMAQsLIAZBAXQhCSACIQADQCAAIAZIBEAgACAHcSIKIAAgCHEiBUEBdGohCyAAIANqIgwqAgAhDyAMKgIEIRAgASAFaiIMKgIAIQ0gDCoCBCEOIAMgCSAFayAKamoiBSoCACERIAUq
AgQhEiAEIAtqIgUgDyARkjgCACAFIBAgEpM4AgQgBCACIAtqaiIFIA0gDyARkyIPlCAOIBAgEpIiEJSTOAIAIAUgDSAQlCAOIA+UkjgCBCAAQQhqIQAMAQsLCw==`
const wasmMod = new WebAssembly.Module(Uint8Array.from(atob(wasmSrc), x => x.charCodeAt(0)))
const { exports: { memory, complexPhase, realPhase, realPhaseInit, realPhaseInverse, realPhaseInitInverse } } = new WebAssembly.Instance(wasmMod)
// dead simple memory allocator
let memoryCursor = 0
function allocate(size, cls, align=cls.BYTES_PER_ELEMENT) {
memoryCursor += ((-memoryCursor % align) + align) % align
const ptr = memoryCursor
memoryCursor += size * cls.BYTES_PER_ELEMENT
while (memory.buffer.byteLength < memoryCursor)
memory.grow(Math.max(1, memory.buffer.byteLength / (1<<16)))
return Object.assign(() => new cls(memory.buffer, ptr, size), { ptr })
}
/**
* Allocate resources for an FFT of a certain size.
*
* **Warning:** Every call to this function *permanently* allocates resources;
* you're expected to only call it once per size and reuse the returned FFT.
* If this is not desired, improve the simple allocator or use the pure JS version.
*
* @param {number} N - FFT points (must be a power of 2, and at least 4)
* @param {boolean} [real] - pass true to request a real DFT (where
* only one half of the symmetrical spectrum is returned) instead of
* a regular (complex) DFT.
* @param {boolean} [inverse] - pass true to request a function that undoes
* the effects of the function that would otherwise be returned. In that
* case, mentally swap input and output in the `@returns` description below.
* **Note that it isn't entirely an inverse: after a roundtrip you'll get
* back the same values you started with but scaled by N.**
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function.
* Called with an array with the time-domain samples, and returns an
* array of the same length with the frequency bins. The original array is
* left untouched, and **the returned array is reused/overwritten on future
* calls to the same FFT function.** **It can also become invalid when future
* calls to makeFFT itself are made, as these can grow the WASM memory and
* invalidate the underlying ArrayBuffer.**
*
* Each pair of consecutive floats in the returned array forms a complex number:
* `real0, imag0, real1, imag1, ...`.
* Depending on whether a complex or real FFT was requested, the following applies:
*
* - **Complex FFT:** This is the straightforward one. Here the input
* is a sequence of N complex samples, using the same interleaved layout
* as described above for the output, which contains the bins from 0 to N-1.
* This means both input and output are arrays of 2*N floats.
*
* - **Real FFT:** Here the input is a sequence of N real samples (each being
* one float, thus N floats) and the output contains the frequency bins from
* 0 (DC) to N/2 (Nyquist) inclusive. That's N/2+1 bins, but we use the same
* trick as FFTw to pack them into the space of just N/2 complex numbers:
* since bin 0 and bin N/2 are guaranteed to have a null imaginary part, we
* store the (real part of) bin N/2 where the imaginary part of bin 0 would go:
* `real0, realN/2, real1, imag1, real2, imag2, ... realN/2-1, imagN/2-1`.
* Thus, the output array also has N floats.
*/
export default function makeFFT(N, real=false, inverse=false) {
if (N !== (N >>> 0))
throw new Error('not an u32')
const Nb = Math.log2(N) | 0
if (N !== (1 << Nb) || Nb < 2)
throw new Error('not a power of two, or too small')
const bufSize = N * (real ? 1 : 2)
const sign = inverse ? 1 : -1
// calculate the twiddle factors
const twiddle = allocate(bufSize/2, Float32Array, 8)
{
const twiddleArr = twiddle()
for (let i = 0; i < bufSize/4; i++) {
const arg = sign * 2 * Math.PI * i / N
twiddleArr[2*i+0] = Math.cos(arg)
twiddleArr[2*i+1] = Math.sin(arg)
}
}
let a = allocate(bufSize, Float32Array, 8)
let b = allocate(bufSize, Float32Array, 8)
if (real && inverse) return function fft(src) {
if (src.length !== bufSize)
throw new Error('invalid length')
a().set(src)
for (let idx = Nb-1; idx > 0; idx--) {
realPhaseInverse(N, twiddle.ptr, idx, a.ptr, b.ptr)
; [a, b] = [b, a]
}
realPhaseInitInverse(N, a.ptr, b.ptr)
return b()
}
const phaseInit = real ? realPhaseInit : (N, a, b) => complexPhase(N, twiddle.ptr, 0, a, b)
const phase = real ? realPhase : complexPhase
return function fft(src) {
if (src.length !== bufSize)
throw new Error('invalid length')
a().set(src)
phaseInit(N, a.ptr, b.ptr)
; [a, b] = [b, a]
for (let idx = 1; idx < Nb; idx++) {
phase(N, twiddle.ptr, idx, a.ptr, b.ptr)
; [a, b] = [b, a]
}
return a()
}
}
// This is the AssemblyScript code that compiled the WASM module embedded above.
// You don't actually need this to use the implementation.
export function complexPhase(
N: i32,
twiddle: usize, // &[f32; N]
idx: i32,
src: usize, // &[f32; 2*N]
dst: usize, // &mut [f32; 2*N]
): void {
N <<= 2
const stride = N >>> idx, mask = stride - 1, nmask = ~mask
for (let o = 0; o < N; o += 2 << 2) {
const i = o & nmask, O = o & mask
const i1 = (i<<1) | O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// if JS had complexes: dst[o+N] = src[i1] - twiddle[i] * src[i2]
const ar = f32.load(src+i1+0), ai = f32.load(src+i1+4)
const tr = f32.load(twiddle+i+0), ti = f32.load(twiddle+i+4)
const sr = f32.load(src+i2+0), si = f32.load(src+i2+4)
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
f32.store(dst+o+0, ar + tsr)
f32.store(dst+o+4, ai + tsi)
f32.store(dst+o+N+0, ar - tsr)
f32.store(dst+o+N+4, ai - tsi)
}
}
export function realPhaseInit(
N: i32,
src: usize, // &[f32; N]
dst: usize, // &mut [f32; N]
): void {
N <<= 1
for (let i = 0; i < N; i += 1 << 2) {
const o = i<<1
const a = f32.load(src+i+0), b = f32.load(src+i+N)
f32.store(dst+o+0, a + b)
f32.store(dst+o+4, a - b)
}
}
export function realPhase(
N: i32,
twiddle: usize, // &[f32; N/2]
idx: i32,
src: usize, // &[f32; N]
dst: usize, // &mut [f32; N]
): void {
N <<= 1
const stride = N >> (idx - 1), mask = stride - 1, nmask = ~mask
for (let o = 0; o < stride; o += 2 << 2) {
const i2 = o + stride
const aDC = f32.load(src+o+0), aNY = f32.load(src+o+4)
const bDC = f32.load(src+i2+0), bNY = f32.load(src+i2+4)
f32.store(dst+o+0, aDC + bDC)
f32.store(dst+o+4, aDC - bDC)
f32.store(dst+o+N+0, aNY)
f32.store(dst+o+N+4, -bNY)
}
const N2 = 2*N
for (let o = stride; o < N; o += 2 << 2) {
const i = o & nmask, O = o & mask
const o2 = N2 - i + O
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// dst[o2] = (src[i1] - twiddle[i] * src[i2]).conj
const ar = f32.load(src+i1+0), ai = f32.load(src+i1+4)
const tr = f32.load(twiddle+i+0), ti = f32.load(twiddle+i+4)
const sr = f32.load(src+i2+0), si = f32.load(src+i2+4)
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
f32.store(dst+o+0, ar + tsr)
f32.store(dst+o+4, ai + tsi)
f32.store(dst+o2+0, ar - tsr)
f32.store(dst+o2+4, tsi - ai)
}
}
export function realPhaseInitInverse(
N: i32,
src: usize, // &[f32; N]
dst: usize, // &mut [f32; N]
): void {
N <<= 1
for (let i = 0; i < N; i += 1 << 2) {
const o = i<<1
const a = f32.load(src+o+0), b = f32.load(src+o+4)
f32.store(dst+i+0, a + b)
f32.store(dst+i+N, a - b)
}
}
export function realPhaseInverse(
N: i32,
twiddle: usize, // &[f32; N/2]
idx: i32,
src: usize, // &[f32; N]
dst: usize, // &mut [f32; N]
): void {
N <<= 1
const stride = N >> (idx - 1), mask = stride - 1, nmask = ~mask
for (let o = 0; o < stride; o += 2 << 2) {
const i2 = o + stride
const DC = f32.load(src+o+0), NY = f32.load(src+o+4)
const mr = f32.load(src+o+N+0), mi = f32.load(src+o+N+4)
f32.store(dst+o+0, DC + NY)
f32.store(dst+i2+0, DC - NY)
f32.store(dst+o+4, 2*mr)
f32.store(dst+i2+4, -2*mi)
}
const N2 = 2*N
for (let o = stride; o < N; o += 2 << 2) {
const i = o & nmask, O = o & mask
const o2 = N2 - i + O
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[i1] = src[o] + src[o2].conj
// dst[i2] = (src[o] - src[o2].conj) * twiddle[i]
const ar = f32.load(src+o+0), ai = f32.load(src+o+4)
const tr = f32.load(twiddle+i+0), ti = f32.load(twiddle+i+4)
const sr = f32.load(src+o2+0), si = f32.load(src+o2+4)
f32.store(dst+i1+0, ar + sr)
f32.store(dst+i1+4, ai - si)
const mr = ar - sr
const mi = ai + si
f32.store(dst+i2+0, tr * mr - ti * mi)
f32.store(dst+i2+4, tr * mi + ti * mr)
}
}
/** Pure JS version. */
/**
* Allocate resources for an FFT of a certain size.
* @param {number} N - FFT points (must be a power of 2, and at least 4)
* @param {boolean} [real] - pass true to request a real DFT (where
* only one half of the symmetrical spectrum is returned) instead of
* a regular (complex) DFT.
* @param {boolean} [inverse] - pass true to request a function that undoes
* the effects of the function that would otherwise be returned. In that
* case, mentally swap input and output in the `@returns` description below.
* **Note that it isn't entirely an inverse: after a roundtrip you'll get
* back the same values you started with but scaled by N.**
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function.
* Called with an array with the time-domain samples, and returns an
* array of the same length with the frequency bins. The original array is
* left untouched, and **the returned array is reused/overwritten on future
* calls to the same FFT function.** Each pair of consecutive floats in the
* returned array forms a complex number: `real0, imag0, real1, imag1, ...`.
* Depending on whether a complex or real FFT was requested, the following applies:
*
* - **Complex FFT:** This is the straightforward one. Here the input
* is a sequence of N complex samples, using the same interleaved layout
* as described above for the output, which contains the bins from 0 to N-1.
* This means both input and output are arrays of 2*N floats.
*
* - **Real FFT:** Here the input is a sequence of N real samples (each being
* one float, thus N floats) and the output contains the frequency bins from
* 0 (DC) to N/2 (Nyquist) inclusive. That's N/2+1 bins, but we use the same
* trick as FFTw to pack them into the space of just N/2 complex numbers:
* since bin 0 and bin N/2 are guaranteed to have a null imaginary part, we
* store the (real part of) bin N/2 where the imaginary part of bin 0 would go:
* `real0, realN/2, real1, imag1, real2, imag2, ... realN/2-1, imagN/2-1`.
* Thus, the output array also has N floats.
*/
export default function makeFFT(N, real=false, inverse=false) {
if (N !== (N >>> 0))
throw new Error('not an u32')
const Nb = Math.log2(N) | 0
if (N !== (1 << Nb) || Nb < 2)
throw new Error('not a power of two, or too small')
const bufSize = N * (real ? 1 : 2)
const sign = inverse ? 1 : -1
// calculate the twiddle factors
const twiddle = new Float32Array(bufSize/2)
for (let i = 0; i < bufSize/4; i++) {
const arg = sign * 2 * Math.PI * i / N
twiddle[2*i+0] = Math.cos(arg)
twiddle[2*i+1] = Math.sin(arg)
}
function realPhaseInit(
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const N = 1 << (Nb - 1)
for (let i = 0; i < N; i += 1) {
const o = i<<1
const a = src[i+0], b = src[i+N]
dst[o+0] = a + b
dst[o+1] = a - b
}
}
function realPhase(
/** @type {number} */ idx,
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const N = 1 << (Nb - 1)
const stride = N >> (idx - 1), mask = stride - 1
for (let o = 0; o < stride; o += 2) {
const i2 = o + stride
const aDC = src[o+0], aNY = src[o+1]
const bDC = src[i2+0], bNY = src[i2+1]
dst[o+0] = aDC + bDC
dst[o+1] = aDC - bDC
dst[o+N+0] = aNY
dst[o+N+1] = -bNY
}
const N2 = 2*N
for (let o = stride; o < N; o += 2) {
const i = o & ~mask, O = o & mask
const o2 = N2 - i + O
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// dst[o2] = (src[i1] - twiddle[i] * src[i2]).conj
const ar = src[i1+0], ai = src[i1+1]
const tr = twiddle[i+0], ti = twiddle[i+1]
const sr = src[i2+0], si = src[i2+1]
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
dst[o+0] = ar + tsr
dst[o+1] = ai + tsi
dst[o2+0] = ar - tsr
dst[o2+1] = tsi - ai
}
}
function complexPhase(
/** @type {number} */ idx,
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const stride = 1 << (Nb - idx), mask = stride - 1
for (let o = 0; o < N; o += 2) {
const i = o & ~mask, O = o & mask
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2]
// dst[o+N] = src[i1] - twiddle[i] * src[i2]
const ar = src[i1+0], ai = src[i1+1]
const tr = twiddle[i+0], ti = twiddle[i+1]
const sr = src[i2+0], si = src[i2+1]
const tsr = tr * sr - ti * si
const tsi = tr * si + ti * sr
dst[o+0] = ar + tsr
dst[o+1] = ai + tsi
dst[o+N+0] = ar - tsr
dst[o+N+1] = ai - tsi
}
}
function realPhaseInitInverse(
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const N = 1 << (Nb - 1)
for (let i = 0; i < N; i += 1) {
const o = i<<1
const a = src[o+0], b = src[o+1]
dst[i+0] = a + b
dst[i+N] = a - b
}
}
function realPhaseInverse(
/** @type {number} */ idx,
/** @type {Float64Array | Float32Array} */ src,
/** @type {Float64Array | Float32Array} */ dst,
) {
const N = 1 << (Nb - 1)
const stride = N >> (idx - 1), mask = stride - 1
for (let o = 0; o < stride; o += 2) {
const i2 = o + stride
const DC = src[o+0], NY = src[o+1]
const mr = src[o+N+0], mi = src[o+N+1]
dst[o+0] = DC + NY
dst[i2+0] = DC - NY
dst[o+1] = 2*mr
dst[i2+1] = -2*mi
}
const N2 = 2*N
for (let o = stride; o < N; o += 2) {
const i = o & ~mask, O = o & mask
const o2 = N2 - i + O
const i1 = (i<<1) + O, i2 = i1 + stride
// if JS had complexes: dst[i1] = src[o] + src[o2].conj
// dst[i2] = (src[o] - src[o2].conj) * twiddle[i]
const ar = src[o+0], ai = src[o+1]
const tr = twiddle[i+0], ti = twiddle[i+1]
const sr = src[o2+0], si = src[o2+1]
dst[i1+0] = ar + sr
dst[i1+1] = ai - si
const mr = ar - sr
const mi = ai + si
dst[i2+0] = tr * mr - ti * mi
dst[i2+1] = tr * mi + ti * mr
}
}
let a = new Float32Array(bufSize)
let b = new Float32Array(bufSize)
if (real && inverse) return function fft(src) {
if (src.length !== bufSize)
throw new Error('invalid length')
realPhaseInverse(Nb-1, src, a)
for (let idx = Nb-2; idx > 0; idx--) {
realPhaseInverse(idx, a, b)
; [a, b] = [b, a]
}
realPhaseInitInverse(a, b)
return b
}
const phaseInit = real ? realPhaseInit : (a, b) => complexPhase(0, a, b)
const phase = real ? realPhase : complexPhase
return function fft(src) {
if (src.length !== bufSize)
throw new Error('invalid length')
phaseInit(src, a)
for (let idx = 1; idx < Nb; idx++) {
phase(idx, a, b)
; [a, b] = [b, a]
}
return a
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment