Skip to content

Instantly share code, notes, and snippets.

@Protonk
Last active December 5, 2024 21:19
Show Gist options
  • Save Protonk/a96a317dcc6a381b834f36a1abd275ed to your computer and use it in GitHub Desktop.
Save Protonk/a96a317dcc6a381b834f36a1abd275ed to your computer and use it in GitHub Desktop.
Kadlec pools without converging
#include <time.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include <stdlib.h>
// setting a relatively low tolerance allows this behavior
// to be seen more easily, though it will work for
// higher numbers
#define FLOAT_TOL 0.00001f
#define MAX_NR 95
// choosing a lower range doesn't seem to matter
#define LOCAL_FLOAT_START 2.01f
#define LOCAL_FLOAT_END 3.99f
// Floats aren't uniform but their distribution doesn't matter for this
// example.
float uniformRange (float min, float max) {
double x;
float Urand;
x = (double)rand() / (double)((unsigned)RAND_MAX + 1);
Urand = (float) x;
return (max - min) * Urand + min;
}
// See https://web.archive.org/web/20220826232306/http://rrrola.wz.cz/inv_sqrt.html
// Optimal Newton-Raphson / magic constant combination
// found via brute force search of the 32-bit integer space
// Unusual among FISR derivatives in that it restructures the NR
// step.
// Prints output, which shows that after 2 NR
// iterations the function gets stuck at just above the
// threshold error will fall through to MAX_NR
int main() {
srand(time(NULL));
printf("input, guess, step, error, draw, iters\n");
int magic = 0x5F1FFFF9;
float result, error, guess, step_NR;
for (int draw = 0; draw < 50; draw++) {
float input = uniformRange(LOCAL_FLOAT_START, LOCAL_FLOAT_END);
float reference = 1.0f / sqrtf(input);
union { float f; uint32_t u; } y = {input};
y.u = magic - (y.u >> 1);
int iterations = 1;
// The behavior persists even if you restructure the loop
// or adjust the precision of the chosen constants
do {
guess = y.f;
for (int i = 0; i < iterations; i++) {
y.f = 0.703952253f * y.f * (2.38924456f - input * y.f * y.f);
}
result = y.f;
error = (fabsf(result - reference) / reference);
step_NR = fabsf(guess - result);
// Unless we converge, print output, which includes the input draw we are on
// so we can follow the individual iterations; the value of the guess; AND
// the difference between the guess and the NR output (which will trend to 0)
if ( error <= FLOAT_TOL) {
break;
} else {
printf("%e,%e,%e,%e,%d,%d\n", input, guess, step_NR, error, draw, iterations);
}
iterations++;
} while (iterations < MAX_NR);
}
return 0;
}
library(ggplot2)
tested <- read.csv("~/Your/File/here.csv")
## easier to show in tables
tested <- tested %>%
mutate_if(is.numeric, round, digits=4)
## if you want to play in R with it:
kadlec <- function(input, guess) {
return(0.703952253 * guess * (2.38924456 - input * guess * guess))
}
## some ways to see:
with(tested, table(guess, step))
ggplot(tested, aes(x = iters, y = guess, group = draw)) + geom_line()
nr_param <- read.csv(text = "
input,version,iters,output,error
0.0125,2,1,10.288025856,1.343753815
0.0125,2,2,10.007713318,1.063441277
0.0125,2,3,10.130422592,1.186150551
0.0125,2,4,10.080792427,1.136520386
0.0125,2,5,10.101626396,1.157354355
0.0125,2,6,10.093007088,1.148735046
0.0125,3,1,11.675456047,2.731184006
0.0125,3,2,10.207276344,1.263004303
0.0125,3,3,11.428794861,2.484522820
0.0125,3,4,10.489167213,1.544895172
0.0125,3,5,11.276955605,2.332683563
0.0125,3,6,10.646800995,1.702528954
0.0125,4,1,13.027447701,4.083175659
0.0125,4,2,9.237071037,0.292798996
0.0125,4,3,13.045556068,4.101284027
0.0125,4,4,9.198582649,0.254310608
0.0125,4,5,13.045590401,4.101318359
0.0125,4,6,9.198509216,0.254237175
0.0125,5,1,14.338942528,5.394670486
0.0125,5,2,6.810316086,2.133955956
0.0125,5,3,13.024043083,4.079771042
0.0125,5,4,10.416212082,1.471940041
0.0125,5,5,14.078761101,5.134489059
0.0125,5,6,7.626488686,1.317783356
0.0125,6,1,15.605068207,6.660796165
0.0125,6,2,2.673947096,6.270324707
0.0125,6,3,6.602978230,2.341293812
0.0125,6,4,13.965343475,5.021071434
0.0125,6,5,8.976306915,0.032034874
0.0125,6,6,15.758172989,6.813900948
0.0125,7,1,16.821132660,7.876860619
0.0125,7,2,-3.376579523,12.320851326
0.0125,7,3,-8.873324394,17.817596436
0.0125,7,4,-17.094469070,26.038742065
0.0125,7,5,5.082199574,3.862072468
0.0125,7,6,12.591745377,3.647473335
0.0125,8,1,17.982629776,9.038357735
0.0125,8,2,-11.483901978,20.428173065
0.0125,8,3,-17.100805283,26.045078278
0.0125,8,4,5.042374134,3.901897907
0.0125,8,5,13.472912788,4.528640747
0.0125,8,6,12.631697655,3.687425613
")
version_colors <- colorRampPalette(c("#2171b5", "#6baed6", "#bdd7e7", "#f4a582", "#d6604d", "#b2182b"))(10)
nr_param %>%
ggplot(aes(x = iters, y = error, color = factor(version), group = version)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
scale_color_manual(values = version_colors) +
labs(x = "Iterations",
y = "Error",
title = "Performance of Different Versions Across Iterations",
subtitle = "Input: 0.0125",
color = "Version") +
theme_minimal() +
theme(legend.position = "right")
kadlec <- read.csv(text = "
reference,kadlec,iters,error,input
1.189297,1.189539,1,0.000242,0.707000
1.189297,1.162983,2,0.026314,0.707000
1.189297,1.173180,3,0.016117,0.707000
1.189297,1.169557,4,0.019740,0.707000
1.189297,1.170886,5,0.018411,0.707000
1.189297,1.170404,6,0.018893,0.707000
")
# Create the plot
ggplot(kadlec, aes(x = iters)) +
geom_hline(aes(yintercept = reference, color = "Reference", linetype = "Reference"), size = 1) +
geom_hline(aes(yintercept = reference + 0.01, color = "Tolerance", linetype = "Tolerance"),
size = 1.2) +
geom_hline(aes(yintercept = reference - 0.01, color = "Tolerance", linetype = "Tolerance"),
size = 1.2) +
geom_line(aes(y = kadlec, color = "Kadlec Approximation", linetype = "Kadlec Approximation"), size = 1) +
geom_point(aes(y = kadlec, color = "Kadlec Approximation"), size = 3) +
scale_color_manual(values = c("Reference" = "blue", "Tolerance" = "blue", "Kadlec Approximation" = "black")) +
scale_linetype_manual(values = c("Reference" = "solid", "Tolerance" = "dotted", "Kadlec Approximation" = "solid")) +
labs(x = "Iterations", y = "Value",
title = "After 1 iteration, output is \"stuck\" at a value outside tolerance",
subtitle = paste("Input:", kadlec$input[1]),
color = "Legend", linetype = "Legend") +
theme(legend.position = "bottom",
legend.title = element_blank()) +
guides(color = guide_legend(override.aes = list(shape = c(NA, NA, 16))))
#include <stdint.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
float modifiedNRISR(float input, int NR, int version) {
// Interpolate magic number between standard and Kadlec
uint32_t magic_std = 0x5F400000;
uint32_t magic_kad = 0x5F1FFFF9;
float t = (version - 1) / 9.0f;
uint32_t magic = magic_std - (uint32_t)(t * (magic_std - magic_kad));
// Initial guess using interpolated magic
union { float f; uint32_t u; } y = {input};
y.u = magic - (y.u >> 1);
if (version == 1) {
// Standard Newton-Raphson
while (NR > 0) {
y.f = y.f * (1.5f - 0.5f * input * y.f * y.f);
NR--;
}
} else if (version == 10) {
// Pure Kadlec
while (NR > 0) {
y.f = 0.703952253f * y.f * (2.38924456f - input * y.f * y.f);
NR--;
}
} else {
// Interpolated versions (2-9)
float outer = 1.5f + (1.87f * t); // 1.5 to 3.37
float inner = 0.5f + (0.5f * t); // 0.5 to 1.0
while (NR > 0) {
y.f = y.f * (outer - inner * input * y.f * y.f);
NR--;
}
}
return y.f;
}
int main() {
// Print CSV header
printf("input,version,iters,output,error\n");
float input = 0.0125f;
float reference = 1.0f / sqrtf(input);
// Test each version
for (int version = 2; version <= 8; version++) {
// Test each iteration count
for (int iters = 1; iters <= 6; iters++) {
float result = modifiedNRISR(input, iters, version);
float error = fabsf(result - reference);
printf("%.4f,%d,%d,%.9f,%.9f\n",
input, version, iters, result, error);
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment