Last active
December 5, 2024 21:19
-
-
Save Protonk/a96a317dcc6a381b834f36a1abd275ed to your computer and use it in GitHub Desktop.
Kadlec pools without converging
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <time.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <math.h> | |
#include <stdlib.h> | |
// setting a relatively low tolerance allows this behavior | |
// to be seen more easily, though it will work for | |
// higher numbers | |
#define FLOAT_TOL 0.00001f | |
#define MAX_NR 95 | |
// choosing a lower range doesn't seem to matter | |
#define LOCAL_FLOAT_START 2.01f | |
#define LOCAL_FLOAT_END 3.99f | |
// Floats aren't uniform but their distribution doesn't matter for this | |
// example. | |
float uniformRange (float min, float max) { | |
double x; | |
float Urand; | |
x = (double)rand() / (double)((unsigned)RAND_MAX + 1); | |
Urand = (float) x; | |
return (max - min) * Urand + min; | |
} | |
// See https://web.archive.org/web/20220826232306/http://rrrola.wz.cz/inv_sqrt.html | |
// Optimal Newton-Raphson / magic constant combination | |
// found via brute force search of the 32-bit integer space | |
// Unusual among FISR derivatives in that it restructures the NR | |
// step. | |
// Prints output, which shows that after 2 NR | |
// iterations the function gets stuck at just above the | |
// threshold error will fall through to MAX_NR | |
int main() { | |
srand(time(NULL)); | |
printf("input, guess, step, error, draw, iters\n"); | |
int magic = 0x5F1FFFF9; | |
float result, error, guess, step_NR; | |
for (int draw = 0; draw < 50; draw++) { | |
float input = uniformRange(LOCAL_FLOAT_START, LOCAL_FLOAT_END); | |
float reference = 1.0f / sqrtf(input); | |
union { float f; uint32_t u; } y = {input}; | |
y.u = magic - (y.u >> 1); | |
int iterations = 1; | |
// The behavior persists even if you restructure the loop | |
// or adjust the precision of the chosen constants | |
do { | |
guess = y.f; | |
for (int i = 0; i < iterations; i++) { | |
y.f = 0.703952253f * y.f * (2.38924456f - input * y.f * y.f); | |
} | |
result = y.f; | |
error = (fabsf(result - reference) / reference); | |
step_NR = fabsf(guess - result); | |
// Unless we converge, print output, which includes the input draw we are on | |
// so we can follow the individual iterations; the value of the guess; AND | |
// the difference between the guess and the NR output (which will trend to 0) | |
if ( error <= FLOAT_TOL) { | |
break; | |
} else { | |
printf("%e,%e,%e,%e,%d,%d\n", input, guess, step_NR, error, draw, iterations); | |
} | |
iterations++; | |
} while (iterations < MAX_NR); | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
tested <- read.csv("~/Your/File/here.csv") | |
## easier to show in tables | |
tested <- tested %>% | |
mutate_if(is.numeric, round, digits=4) | |
## if you want to play in R with it: | |
kadlec <- function(input, guess) { | |
return(0.703952253 * guess * (2.38924456 - input * guess * guess)) | |
} | |
## some ways to see: | |
with(tested, table(guess, step)) | |
ggplot(tested, aes(x = iters, y = guess, group = draw)) + geom_line() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nr_param <- read.csv(text = " | |
input,version,iters,output,error | |
0.0125,2,1,10.288025856,1.343753815 | |
0.0125,2,2,10.007713318,1.063441277 | |
0.0125,2,3,10.130422592,1.186150551 | |
0.0125,2,4,10.080792427,1.136520386 | |
0.0125,2,5,10.101626396,1.157354355 | |
0.0125,2,6,10.093007088,1.148735046 | |
0.0125,3,1,11.675456047,2.731184006 | |
0.0125,3,2,10.207276344,1.263004303 | |
0.0125,3,3,11.428794861,2.484522820 | |
0.0125,3,4,10.489167213,1.544895172 | |
0.0125,3,5,11.276955605,2.332683563 | |
0.0125,3,6,10.646800995,1.702528954 | |
0.0125,4,1,13.027447701,4.083175659 | |
0.0125,4,2,9.237071037,0.292798996 | |
0.0125,4,3,13.045556068,4.101284027 | |
0.0125,4,4,9.198582649,0.254310608 | |
0.0125,4,5,13.045590401,4.101318359 | |
0.0125,4,6,9.198509216,0.254237175 | |
0.0125,5,1,14.338942528,5.394670486 | |
0.0125,5,2,6.810316086,2.133955956 | |
0.0125,5,3,13.024043083,4.079771042 | |
0.0125,5,4,10.416212082,1.471940041 | |
0.0125,5,5,14.078761101,5.134489059 | |
0.0125,5,6,7.626488686,1.317783356 | |
0.0125,6,1,15.605068207,6.660796165 | |
0.0125,6,2,2.673947096,6.270324707 | |
0.0125,6,3,6.602978230,2.341293812 | |
0.0125,6,4,13.965343475,5.021071434 | |
0.0125,6,5,8.976306915,0.032034874 | |
0.0125,6,6,15.758172989,6.813900948 | |
0.0125,7,1,16.821132660,7.876860619 | |
0.0125,7,2,-3.376579523,12.320851326 | |
0.0125,7,3,-8.873324394,17.817596436 | |
0.0125,7,4,-17.094469070,26.038742065 | |
0.0125,7,5,5.082199574,3.862072468 | |
0.0125,7,6,12.591745377,3.647473335 | |
0.0125,8,1,17.982629776,9.038357735 | |
0.0125,8,2,-11.483901978,20.428173065 | |
0.0125,8,3,-17.100805283,26.045078278 | |
0.0125,8,4,5.042374134,3.901897907 | |
0.0125,8,5,13.472912788,4.528640747 | |
0.0125,8,6,12.631697655,3.687425613 | |
") | |
version_colors <- colorRampPalette(c("#2171b5", "#6baed6", "#bdd7e7", "#f4a582", "#d6604d", "#b2182b"))(10) | |
nr_param %>% | |
ggplot(aes(x = iters, y = error, color = factor(version), group = version)) + | |
geom_line(linewidth = 1) + | |
geom_point(size = 3) + | |
scale_color_manual(values = version_colors) + | |
labs(x = "Iterations", | |
y = "Error", | |
title = "Performance of Different Versions Across Iterations", | |
subtitle = "Input: 0.0125", | |
color = "Version") + | |
theme_minimal() + | |
theme(legend.position = "right") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
kadlec <- read.csv(text = " | |
reference,kadlec,iters,error,input | |
1.189297,1.189539,1,0.000242,0.707000 | |
1.189297,1.162983,2,0.026314,0.707000 | |
1.189297,1.173180,3,0.016117,0.707000 | |
1.189297,1.169557,4,0.019740,0.707000 | |
1.189297,1.170886,5,0.018411,0.707000 | |
1.189297,1.170404,6,0.018893,0.707000 | |
") | |
# Create the plot | |
ggplot(kadlec, aes(x = iters)) + | |
geom_hline(aes(yintercept = reference, color = "Reference", linetype = "Reference"), size = 1) + | |
geom_hline(aes(yintercept = reference + 0.01, color = "Tolerance", linetype = "Tolerance"), | |
size = 1.2) + | |
geom_hline(aes(yintercept = reference - 0.01, color = "Tolerance", linetype = "Tolerance"), | |
size = 1.2) + | |
geom_line(aes(y = kadlec, color = "Kadlec Approximation", linetype = "Kadlec Approximation"), size = 1) + | |
geom_point(aes(y = kadlec, color = "Kadlec Approximation"), size = 3) + | |
scale_color_manual(values = c("Reference" = "blue", "Tolerance" = "blue", "Kadlec Approximation" = "black")) + | |
scale_linetype_manual(values = c("Reference" = "solid", "Tolerance" = "dotted", "Kadlec Approximation" = "solid")) + | |
labs(x = "Iterations", y = "Value", | |
title = "After 1 iteration, output is \"stuck\" at a value outside tolerance", | |
subtitle = paste("Input:", kadlec$input[1]), | |
color = "Legend", linetype = "Legend") + | |
theme(legend.position = "bottom", | |
legend.title = element_blank()) + | |
guides(color = guide_legend(override.aes = list(shape = c(NA, NA, 16)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdio.h> | |
#include <time.h> | |
#include <math.h> | |
float modifiedNRISR(float input, int NR, int version) { | |
// Interpolate magic number between standard and Kadlec | |
uint32_t magic_std = 0x5F400000; | |
uint32_t magic_kad = 0x5F1FFFF9; | |
float t = (version - 1) / 9.0f; | |
uint32_t magic = magic_std - (uint32_t)(t * (magic_std - magic_kad)); | |
// Initial guess using interpolated magic | |
union { float f; uint32_t u; } y = {input}; | |
y.u = magic - (y.u >> 1); | |
if (version == 1) { | |
// Standard Newton-Raphson | |
while (NR > 0) { | |
y.f = y.f * (1.5f - 0.5f * input * y.f * y.f); | |
NR--; | |
} | |
} else if (version == 10) { | |
// Pure Kadlec | |
while (NR > 0) { | |
y.f = 0.703952253f * y.f * (2.38924456f - input * y.f * y.f); | |
NR--; | |
} | |
} else { | |
// Interpolated versions (2-9) | |
float outer = 1.5f + (1.87f * t); // 1.5 to 3.37 | |
float inner = 0.5f + (0.5f * t); // 0.5 to 1.0 | |
while (NR > 0) { | |
y.f = y.f * (outer - inner * input * y.f * y.f); | |
NR--; | |
} | |
} | |
return y.f; | |
} | |
int main() { | |
// Print CSV header | |
printf("input,version,iters,output,error\n"); | |
float input = 0.0125f; | |
float reference = 1.0f / sqrtf(input); | |
// Test each version | |
for (int version = 2; version <= 8; version++) { | |
// Test each iteration count | |
for (int iters = 1; iters <= 6; iters++) { | |
float result = modifiedNRISR(input, iters, version); | |
float error = fabsf(result - reference); | |
printf("%.4f,%d,%d,%.9f,%.9f\n", | |
input, version, iters, result, error); | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment