Skip to content

Instantly share code, notes, and snippets.

@ingoogni
Last active May 22, 2025 15:03
Show Gist options
  • Save ingoogni/8107ec525f495a3a9bb96bfb1f183b27 to your computer and use it in GitHub Desktop.
Save ingoogni/8107ec525f495a3a9bb96bfb1f183b27 to your computer and use it in GitHub Desktop.
Goertzel, Rotating Vector vs. sin() Compare sine wave oscillators for speed and accuracy.
# Goertzel oscillator bank (scalar):
# (seconds: 208, nanosecond: 253406100)
# duration: 4440s samplerate: 44100 samples: 195804000
# 0.0000010635809590202449 seconds per sample @ 1000 oscillators
# Goertzel AVX oscillator bank:
# (seconds: 26, nanosecond: 403254200)
# duration: 4440s samplerate: 44100 samples: 195804000
# 0.00000013484532593818306 seconds per sample @ 1000 oscillators
# Speedup: 7.887414351371886x
import std/[math]
import nimsimd/fma
import nimsimd/avx2
when defined(gcc) or defined(clang):
{.localPassc: "-mavx2".}
{.localPassc: "-mfma".}
{.localPassC: "-msse4.1 -msha".}
when defined(vcc):
{.localPassC: "/arch:SSE4.2 /arch:AVX2".}
const
SampleRate {.intdefine.} = 44100
SRate* = SampleRate.float
type
GoertzelOscBank* = object
multiplier: seq[float32]
current*: seq[float32]
previous: seq[float32]
d0: seq[float32]
proc initGOscBank*(frequency: seq[float32], phase: seq[float32]): GoertzelOscBank =
let ts = TAU / SRate
var gob = GoertzelOscBank(
multiplier: newSeq[float32](frequency.len),
current: newSeq[float32](frequency.len),
previous: newSeq[float32](frequency.len),
d0: newSeq[float32](frequency.len),
)
for i in 0..<frequency.len:
let phaseIncrement = ts * frequency[i]
gob.multiplier[i] = cos(phaseIncrement) * 2
gob.current[i] = sin(phase[i] - phaseIncrement)
gob.previous[i] = sin(phase[i] - 2 * phaseIncrement)
return gob
proc next*(oscbank: var GoertzelOscBank) =
for i in 0..<oscbank.d0.len:
oscbank.d0[i] = oscbank.multiplier[i] * oscbank.current[i] - oscbank.previous[i]
oscbank.previous[i] = oscbank.current[i]
oscbank.current[i] = oscbank.d0[i]
proc nextAVX*(oscbank: var GoertzelOscBank) =
let len = oscbank.d0.len
let simd_len = (len div 8) * 8
# Process 8 elements at once
for i in countup(0, simd_len - 1, 8):
# Load 8 values at once
let multVec = mm256_loadu_ps(cast[ptr float32](oscbank.multiplier[i].addr))
let currVec = mm256_loadu_ps(cast[ptr float32](oscbank.current[i].addr))
let prevVec = mm256_loadu_ps(cast[ptr float32](oscbank.previous[i].addr))
# d0 = multiplier * current - previous
let d0Vec = mm256_fmsub_ps(multVec, currVec, prevVec)
# d0 results
mm256_storeu_ps(cast[ptr float32](oscbank.d0[i].addr), d0Vec)
# Update: previous = current & current = d0
mm256_storeu_ps(cast[ptr float32](oscbank.previous[i].addr), currVec)
mm256_storeu_ps(cast[ptr float32](oscbank.current[i].addr), d0Vec)
# Scalar version for remaining elements
for i in simd_len..<len:
oscbank.d0[i] = oscbank.multiplier[i] * oscbank.current[i] - oscbank.previous[i]
oscbank.previous[i] = oscbank.current[i]
oscbank.current[i] = oscbank.d0[i]
when isMainModule:
import std/[monotimes]
func lmap*(t, minin, maxin, minout, maxout: float): float {.inline.} =
((t - minin) / (maxin - minin)) * (maxout - minout) + minout
let
duration = 4440 # 74 min ~ 1 CD
ticks = duration * SampleRate
nOsc = 1000
var freq = newSeq[float32](nOsc)
for i in 0..<freq.len:
freq[i] = 20.0 + i.float
var phase = newSeq[float32](nOsc)
for i in 0..<freq.len:
phase[i] = lmap(i.float32, -PI, PI, 0.0, ticks.float32)
# Test scalar version
var oscbank = initGOscBank(freq, phase)
let t0 = getMonoTime()
for i in 0..<ticks:
oscbank.next
let t1 = getMonoTime()
let run = (ticks(t1) - ticks(t0)).float / 1000000000
echo "\nGoertzel oscillator bank (scalar):"
echo t1 - t0
echo "duration: ", duration, "s samplerate: ", SampleRate, " samples: ", ticks
echo run / ticks.float, " seconds per sample @ ", nOsc, " oscillators"
# AVX version
oscbank = initGOscBank(freq, phase)
let t3 = getMonoTime()
for i in 0..<ticks:
oscbank.nextAVX
let t4 = getMonoTime()
let runavx = (ticks(t4) - ticks(t3)).float / 1000000000
echo "\nGoertzel AVX oscillator bank:"
echo t4 - t3
echo "duration: ", duration, "s samplerate: ", SampleRate, " samples: ", ticks
echo runavx / ticks.float, " seconds per sample @ ", nOsc, " oscillators"
echo "Speedup: ", run / runavx, "x"
#nim.cfg
#-d:release
#-d:danger
#-d:samplerate=44100
# Result (12th Gen Intel(R) Core(TM) i9-12900 2.40 GHz):
#
# GSinOsc : (seconds: 0, nanosecond: 516204700)
# RVSinOsc : (seconds: 1, nanosecond: 672888300)
# STDSinOsc: (seconds: 5, nanosecond: 908334300)
# MSerror Goertzel vs sin() : 2.108412096000499e-15
# MSerror Rotating vec vs sin(): 0.001964335532599123
import std/[complex, math]
const samplerate {.intdefine.} = 44100
type
GSinOsc* = object #inverse / reverse Goertzel
multiplier: float
current*: float
previous: float
n: int
RvSinOsc = object #rotating vector
multiplier: Complex64
value*: Complex64
func initGsinOsc(frequency: float, phase: float = 0.0): GSinOsc =
let
phaseIncrement = frequency * Tau / samplerate.float
GSinosc(
multiplier: 2 * cos(phaseIncrement),
current: sin(phase),
previous: sin(phase - phaseIncrement),
n: 0
)
func next(osc: var GSinOsc){.inline.} =
if osc.n > 0:
let d0 = osc.multiplier * osc.current - osc.previous
osc.previous = osc.current
osc.current = d0
else:
inc osc.n
func initRvSinOsc(frequency:float, samplerate:int): RvSinOsc =
let phaseIncrement = frequency * Tau / samplerate.float
RvSinOsc(
multiplier: complex64(cos(phaseIncrement), sin(phaseIncrement)),
value: complex64(1.0, 0.0)
)
func next(osc: var RvSinOsc){.inline.} =
osc.value = osc.value * osc.multiplier
when isMainModule:
import std/[monotimes]
func mse(s1, s2: seq[float]):float =
var mseSum = 0.0
for i in 0..<s1.len:
let diff = s1[i] - s2[i]
mseSum += float(diff * diff)
mseSum / float(s1.len)
let t = 4440 #seconds, is 74 min, is cd length.
let ticks = t * samplerate
let frequency = 440.0
var
gsinOsc = initGsinOsc(frequency)
gbuff = newSeq[float](ticks)
let t0 = getMonoTime()
for i in 0..<ticks:
gsinOsc.next()
gbuff[i] = gsinOsc.current
echo "GSinOsc : ", getMonoTime() - t0
var
rvsinOsc = initRvSinOsc(frequency, samplerate)
rvbuff = newSeq[float](ticks)
let t1 = getMonoTime()
for i in 0..<ticks:
rvsinOsc.next()
rvbuff[i] = rvsinOsc.value.im
echo "RVSinOsc : ", getMonoTime() - t1
var
stdbuff = newSeq[float](ticks)
let
phaseIncrement = frequency * Tau / samplerate.float
t2 = getMonoTime()
for tick in 0..<ticks:
stdbuff[tick] = sin(tick.float * phaseIncrement)
echo "STDSinOsc: ", getMonoTime() - t2
echo "MSerror Goertzel vs sin() : ", mse(stdbuff, gbuff)
echo "MSerror Rotating vec vs sin(): ", mse(stdbuff, rvbuff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment