Last active
May 22, 2025 15:03
-
-
Save ingoogni/8107ec525f495a3a9bb96bfb1f183b27 to your computer and use it in GitHub Desktop.
Goertzel, Rotating Vector vs. sin() Compare sine wave oscillators for speed and accuracy.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Goertzel oscillator bank (scalar): | |
# (seconds: 208, nanosecond: 253406100) | |
# duration: 4440s samplerate: 44100 samples: 195804000 | |
# 0.0000010635809590202449 seconds per sample @ 1000 oscillators | |
# Goertzel AVX oscillator bank: | |
# (seconds: 26, nanosecond: 403254200) | |
# duration: 4440s samplerate: 44100 samples: 195804000 | |
# 0.00000013484532593818306 seconds per sample @ 1000 oscillators | |
# Speedup: 7.887414351371886x | |
import std/[math] | |
import nimsimd/fma | |
import nimsimd/avx2 | |
when defined(gcc) or defined(clang): | |
{.localPassc: "-mavx2".} | |
{.localPassc: "-mfma".} | |
{.localPassC: "-msse4.1 -msha".} | |
when defined(vcc): | |
{.localPassC: "/arch:SSE4.2 /arch:AVX2".} | |
const | |
SampleRate {.intdefine.} = 44100 | |
SRate* = SampleRate.float | |
type | |
GoertzelOscBank* = object | |
multiplier: seq[float32] | |
current*: seq[float32] | |
previous: seq[float32] | |
d0: seq[float32] | |
proc initGOscBank*(frequency: seq[float32], phase: seq[float32]): GoertzelOscBank = | |
let ts = TAU / SRate | |
var gob = GoertzelOscBank( | |
multiplier: newSeq[float32](frequency.len), | |
current: newSeq[float32](frequency.len), | |
previous: newSeq[float32](frequency.len), | |
d0: newSeq[float32](frequency.len), | |
) | |
for i in 0..<frequency.len: | |
let phaseIncrement = ts * frequency[i] | |
gob.multiplier[i] = cos(phaseIncrement) * 2 | |
gob.current[i] = sin(phase[i] - phaseIncrement) | |
gob.previous[i] = sin(phase[i] - 2 * phaseIncrement) | |
return gob | |
proc next*(oscbank: var GoertzelOscBank) = | |
for i in 0..<oscbank.d0.len: | |
oscbank.d0[i] = oscbank.multiplier[i] * oscbank.current[i] - oscbank.previous[i] | |
oscbank.previous[i] = oscbank.current[i] | |
oscbank.current[i] = oscbank.d0[i] | |
proc nextAVX*(oscbank: var GoertzelOscBank) = | |
let len = oscbank.d0.len | |
let simd_len = (len div 8) * 8 | |
# Process 8 elements at once | |
for i in countup(0, simd_len - 1, 8): | |
# Load 8 values at once | |
let multVec = mm256_loadu_ps(cast[ptr float32](oscbank.multiplier[i].addr)) | |
let currVec = mm256_loadu_ps(cast[ptr float32](oscbank.current[i].addr)) | |
let prevVec = mm256_loadu_ps(cast[ptr float32](oscbank.previous[i].addr)) | |
# d0 = multiplier * current - previous | |
let d0Vec = mm256_fmsub_ps(multVec, currVec, prevVec) | |
# d0 results | |
mm256_storeu_ps(cast[ptr float32](oscbank.d0[i].addr), d0Vec) | |
# Update: previous = current & current = d0 | |
mm256_storeu_ps(cast[ptr float32](oscbank.previous[i].addr), currVec) | |
mm256_storeu_ps(cast[ptr float32](oscbank.current[i].addr), d0Vec) | |
# Scalar version for remaining elements | |
for i in simd_len..<len: | |
oscbank.d0[i] = oscbank.multiplier[i] * oscbank.current[i] - oscbank.previous[i] | |
oscbank.previous[i] = oscbank.current[i] | |
oscbank.current[i] = oscbank.d0[i] | |
when isMainModule: | |
import std/[monotimes] | |
func lmap*(t, minin, maxin, minout, maxout: float): float {.inline.} = | |
((t - minin) / (maxin - minin)) * (maxout - minout) + minout | |
let | |
duration = 4440 # 74 min ~ 1 CD | |
ticks = duration * SampleRate | |
nOsc = 1000 | |
var freq = newSeq[float32](nOsc) | |
for i in 0..<freq.len: | |
freq[i] = 20.0 + i.float | |
var phase = newSeq[float32](nOsc) | |
for i in 0..<freq.len: | |
phase[i] = lmap(i.float32, -PI, PI, 0.0, ticks.float32) | |
# Test scalar version | |
var oscbank = initGOscBank(freq, phase) | |
let t0 = getMonoTime() | |
for i in 0..<ticks: | |
oscbank.next | |
let t1 = getMonoTime() | |
let run = (ticks(t1) - ticks(t0)).float / 1000000000 | |
echo "\nGoertzel oscillator bank (scalar):" | |
echo t1 - t0 | |
echo "duration: ", duration, "s samplerate: ", SampleRate, " samples: ", ticks | |
echo run / ticks.float, " seconds per sample @ ", nOsc, " oscillators" | |
# AVX version | |
oscbank = initGOscBank(freq, phase) | |
let t3 = getMonoTime() | |
for i in 0..<ticks: | |
oscbank.nextAVX | |
let t4 = getMonoTime() | |
let runavx = (ticks(t4) - ticks(t3)).float / 1000000000 | |
echo "\nGoertzel AVX oscillator bank:" | |
echo t4 - t3 | |
echo "duration: ", duration, "s samplerate: ", SampleRate, " samples: ", ticks | |
echo runavx / ticks.float, " seconds per sample @ ", nOsc, " oscillators" | |
echo "Speedup: ", run / runavx, "x" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#nim.cfg | |
#-d:release | |
#-d:danger | |
#-d:samplerate=44100 | |
# Result (12th Gen Intel(R) Core(TM) i9-12900 2.40 GHz): | |
# | |
# GSinOsc : (seconds: 0, nanosecond: 516204700) | |
# RVSinOsc : (seconds: 1, nanosecond: 672888300) | |
# STDSinOsc: (seconds: 5, nanosecond: 908334300) | |
# MSerror Goertzel vs sin() : 2.108412096000499e-15 | |
# MSerror Rotating vec vs sin(): 0.001964335532599123 | |
import std/[complex, math] | |
const samplerate {.intdefine.} = 44100 | |
type | |
GSinOsc* = object #inverse / reverse Goertzel | |
multiplier: float | |
current*: float | |
previous: float | |
n: int | |
RvSinOsc = object #rotating vector | |
multiplier: Complex64 | |
value*: Complex64 | |
func initGsinOsc(frequency: float, phase: float = 0.0): GSinOsc = | |
let | |
phaseIncrement = frequency * Tau / samplerate.float | |
GSinosc( | |
multiplier: 2 * cos(phaseIncrement), | |
current: sin(phase), | |
previous: sin(phase - phaseIncrement), | |
n: 0 | |
) | |
func next(osc: var GSinOsc){.inline.} = | |
if osc.n > 0: | |
let d0 = osc.multiplier * osc.current - osc.previous | |
osc.previous = osc.current | |
osc.current = d0 | |
else: | |
inc osc.n | |
func initRvSinOsc(frequency:float, samplerate:int): RvSinOsc = | |
let phaseIncrement = frequency * Tau / samplerate.float | |
RvSinOsc( | |
multiplier: complex64(cos(phaseIncrement), sin(phaseIncrement)), | |
value: complex64(1.0, 0.0) | |
) | |
func next(osc: var RvSinOsc){.inline.} = | |
osc.value = osc.value * osc.multiplier | |
when isMainModule: | |
import std/[monotimes] | |
func mse(s1, s2: seq[float]):float = | |
var mseSum = 0.0 | |
for i in 0..<s1.len: | |
let diff = s1[i] - s2[i] | |
mseSum += float(diff * diff) | |
mseSum / float(s1.len) | |
let t = 4440 #seconds, is 74 min, is cd length. | |
let ticks = t * samplerate | |
let frequency = 440.0 | |
var | |
gsinOsc = initGsinOsc(frequency) | |
gbuff = newSeq[float](ticks) | |
let t0 = getMonoTime() | |
for i in 0..<ticks: | |
gsinOsc.next() | |
gbuff[i] = gsinOsc.current | |
echo "GSinOsc : ", getMonoTime() - t0 | |
var | |
rvsinOsc = initRvSinOsc(frequency, samplerate) | |
rvbuff = newSeq[float](ticks) | |
let t1 = getMonoTime() | |
for i in 0..<ticks: | |
rvsinOsc.next() | |
rvbuff[i] = rvsinOsc.value.im | |
echo "RVSinOsc : ", getMonoTime() - t1 | |
var | |
stdbuff = newSeq[float](ticks) | |
let | |
phaseIncrement = frequency * Tau / samplerate.float | |
t2 = getMonoTime() | |
for tick in 0..<ticks: | |
stdbuff[tick] = sin(tick.float * phaseIncrement) | |
echo "STDSinOsc: ", getMonoTime() - t2 | |
echo "MSerror Goertzel vs sin() : ", mse(stdbuff, gbuff) | |
echo "MSerror Rotating vec vs sin(): ", mse(stdbuff, rvbuff) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment