Last active
August 25, 2025 10:49
-
-
Save mildsunrise/d5990a0651009b8c96460c53a3493e03 to your computer and use it in GitHub Desktop.
simple 2^n FFT that isn't terribly slow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** WASM-accelerated version. Except for very small sizes it's usually much faster, | |
and performs on the same ballpark as PulseFFT */ | |
const wasmSrc = 'AGFzbQEAAAABCQFgBX9/f39/AAMCAQAFAwEAAAcVAghmZnRQaGFzZQAABm1lbW9yeQIACsQBAcEBAgZ/B30gACACdkECdCIGQQFrIgdBf3MhAiAAQQJ0IQADQCAAIAVKBEAgBSAHcSACIAVxIghBAXRyIgkgA2oiCioCACELIAoqAgQhDCAEIAVqIgogCyABIAhqIggqAgAiECADIAYgCWpqIgkqAgAiEZQgCCoCBCINIAkqAgQiDpSTIg+SOAIAIAogDCAQIA6UIA0gEZSSIg2SOAIEIAAgCmoiCCALIA+TOAIAIAggDCANkzgCBCAFQQhqIQUMAQsLCw==' | |
const wasmMod = new WebAssembly.Module(Uint8Array.from(atob(wasmSrc), x => x.charCodeAt(0))) | |
const { exports: { memory, fftPhase } } = new WebAssembly.Instance(wasmMod) | |
// dead simple memory allocator | |
let memoryCursor = 0 | |
function allocate(size, cls, align=cls.BYTES_PER_ELEMENT) { | |
memoryCursor += ((-memoryCursor % align) + align) % align | |
const ptr = memoryCursor | |
memoryCursor += size * cls.BYTES_PER_ELEMENT | |
while (memory.buffer.byteLength < memoryCursor) | |
memory.grow(Math.max(1, memory.buffer.byteLength / (1<<16))) | |
return Object.assign(() => new cls(memory.buffer, ptr, size), { ptr }) | |
} | |
/** | |
* Allocate resources for an FFT of a certain size. | |
* | |
* **Warning:** Every call to this function *permanently* allocates resources; | |
* you're expected to only call it once per size and reuse the returned FFT. | |
* If this is not desired, improve the simple allocator or use the pure JS version. | |
* | |
* @param {number} N - FFT points (must be a power of 2, and at least 2) | |
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function. | |
* must be passed an array of length 2*N (real0, imag0, real1, imag1, ...). | |
* the argument will be left untouched and an array of the same length and layout | |
* will be returned with the result. the first complex number of the array is DC. | |
* **the buffer is reused/overwritten on future invocations of the same function.** | |
* **the buffer can become invalid when future calls to makeFFT itself are made, | |
* as these can grow the WASM memory and invalidate the underlying ArrayBuffer.** | |
*/ | |
export default function makeFFT(N) { | |
if (N !== (N >>> 0)) | |
throw new Error('not an u32') | |
const Nb = Math.log2(N) | 0 | |
if (N !== (1 << Nb) || Nb < 1) | |
throw new Error('not a power of two, or too small') | |
// calculate the twiddle factors | |
const twiddle = allocate(N, Float32Array, 8) | |
{ | |
const twiddleArr = twiddle() | |
for (let i = 0; i < N; i++) { | |
const arg = - 2 * Math.PI * i / N | |
twiddleArr[2*i+0] = Math.cos(arg) | |
twiddleArr[2*i+1] = Math.sin(arg) | |
} | |
} | |
let a = allocate(2*N, Float32Array, 8) | |
let b = allocate(2*N, Float32Array, 8) | |
return function fft(src) { | |
if (src.length !== 2*N) | |
throw new Error('invalid length') | |
a().set(src) | |
for (let idx = 0; idx < Nb; idx++) { | |
fftPhase(N, twiddle.ptr, idx, a.ptr, b.ptr) | |
; [a, b] = [b, a] | |
} | |
return a() | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is the AssemblyScript code that compiled the WASM module embedded above. | |
// You don't actually need this to use the implementation. | |
export function fftPhase( | |
N: i32, | |
twiddle: usize, // &[u32; N] | |
idx: i32, | |
src: usize, // &[u32; 2*N] | |
dst: usize, // &mut [u32; 2*N] | |
): void { | |
N <<= 2 | |
const stride = N >>> idx, mask = stride - 1, nmask = ~mask | |
for (let o = 0; o < N; o += 2 << 2) { | |
const i = o & nmask, O = o & mask | |
const i1 = (i<<1) | O, i2 = i1 + stride | |
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2] | |
// if JS had complexes: dst[o+N] = src[i1] - twiddle[i] * src[i2] | |
const ar = f32.load(src+i1+0), ai = f32.load(src+i1+4) | |
const tr = f32.load(twiddle+i+0), ti = f32.load(twiddle+i+4) | |
const sr = f32.load(src+i2+0), si = f32.load(src+i2+4) | |
const tsr = tr * sr - ti * si | |
const tsi = tr * si + ti * sr | |
f32.store(dst+o+0, ar + tsr) | |
f32.store(dst+o+4, ai + tsi) | |
f32.store(dst+o+N+0, ar - tsr) | |
f32.store(dst+o+N+4, ai - tsi) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Pure JS version. */ | |
/** | |
* Allocate resources for an FFT of a certain size. | |
* @param {number} N - FFT points (must be a power of 2, and at least 2) | |
* @returns {(x: Float32Array | Float64Array) => Float32Array} FFT function. | |
* must be passed an array of length 2*N (real0, imag0, real1, imag1, ...). | |
* the argument will be left untouched and an array of the same length and layout | |
* will be returned with the result. the first complex number of the array is DC. | |
* **the buffer is reused/overwritten on future invocations of the same function.** | |
*/ | |
export default function makeFFT(N) { | |
if (N !== (N >>> 0)) | |
throw new Error('not an u32') | |
const Nb = Math.log2(N) | 0 | |
if (N !== (1 << Nb) || Nb < 1) | |
throw new Error('not a power of two, or too small') | |
// calculate the twiddle factors | |
const twiddle = new Float32Array(N) | |
for (let i = 0; i < N/2; i++) { | |
const arg = - 2 * Math.PI * i / N | |
twiddle[2*i+0] = Math.cos(arg) | |
twiddle[2*i+1] = Math.sin(arg) | |
} | |
function phase( | |
/** @type {number} */ idx, | |
/** @type {Float64Array | Float32Array} */ src, | |
/** @type {Float64Array | Float32Array} */ dst, | |
) { | |
const stride = 1 << (Nb - idx), mask = stride - 1 | |
for (let o = 0; o < N; o += 2) { | |
const i = o & ~mask, O = o & mask | |
const i1 = (i<<1) + O, i2 = i1 + stride | |
// if JS had complexes: dst[o] = src[i1] + twiddle[i] * src[i2] | |
// dst[o+N] = src[i1] - twiddle[i] * src[i2] | |
const ar = src[i1+0], ai = src[i1+1] | |
const tr = twiddle[i+0], ti = twiddle[i+1] | |
const sr = src[i2+0], si = src[i2+1] | |
const tsr = tr * sr - ti * si | |
const tsi = tr * si + ti * sr | |
dst[o+0] = ar + tsr | |
dst[o+1] = ai + tsi | |
dst[o+N+0] = ar - tsr | |
dst[o+N+1] = ai - tsi | |
} | |
} | |
let a = new Float32Array(2*N) | |
let b = new Float32Array(2*N) | |
return function fft(src) { | |
if (src.length !== 2*N) | |
throw new Error('invalid length') | |
phase(0, src, a) | |
for (let idx = 1; idx < Nb; idx++) { | |
phase(idx, a, b) | |
; [a, b] = [b, a] | |
} | |
return a | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment