Last active
April 2, 2024 02:48
-
-
Save mrbid/8a74f4d66ffcc6e869d3e3ab4502a277 to your computer and use it in GitHub Desktop.
RPROP in C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
James William Fletcher (github.com/mrbid) & Test_User (notabug.org/Test_User) | |
April 2024 | |
A sort of RPROP in C | |
Train network to output TRAIN_MAX when 333 is input and TRAIN_MIN otherwise | |
Check the last sign of the unit, if both signs are the same move in direction | |
of last sign if signs are different don't update weight just update sign. | |
Increment in 1's only; no magnitude. Some networks accumulate magnitude the more | |
you move in the same direction with no change in sign, int8 would explode too fast. | |
Problem: | |
- Multiplying two sint8 quickly overflows | |
- On x86 integers wrap when they overflow, casting and branching needed to prevent this. | |
- ? or https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html | |
- https://stackoverflow.com/questions/38625857/using-the-builtin-function-builtin-add-overflow-p-in-gcc | |
This system would require no cyclic learning rate, no skip connections, maybe dropout. | |
sint8 range: -128 to 127 | |
gcc rprop.c -Ofast -ggdb3 -o rprop | |
valgrind -s --track-origins=yes --leak-check=full --show-leak-kinds=all ./rprop | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#define uint unsigned int | |
#define sint8 int8_t // signed int8 | |
#define UNITS 16 | |
#define EPOCHS 1000000 | |
#define TRAIN_MAX 64 | |
#define TRAIN_MIN -64 | |
#define ReLU_LEAK 0 | |
sint8 cap_multiply(sint8 x, sint8 y) { | |
sint8 res; | |
if (__builtin_mul_overflow(x, y, &res)) { | |
x = x > 0 ? 1 : -1; | |
y = y > 0 ? 1 : -1; | |
return x*y > 0 ? 127 : -128; | |
} | |
return res; | |
} | |
sint8 cap_add(sint8 x, sint8 y) { | |
sint8 res; | |
if (__builtin_add_overflow(x, y, &res)) { | |
return x > 0 ? 127 : -128; | |
} | |
return res; | |
} | |
sint8 cap_sub(sint8 x, sint8 y) { | |
sint8 res; | |
if (__builtin_sub_overflow(x, y, &res)) { | |
return x >= 0 ? 127 : -128; | |
} | |
return res; | |
} | |
typedef struct | |
{ | |
// weights (units, weights) | |
sint8 l1w[UNITS][3]; | |
sint8 l2w[UNITS][UNITS]; | |
sint8 l3w[UNITS][UNITS]; | |
sint8 l4w[1][UNITS]; | |
// bias' (units, bias) | |
sint8 l1b[UNITS][1]; | |
sint8 l2b[UNITS][1]; | |
sint8 l3b[UNITS][1]; | |
sint8 l4b; | |
} network; | |
network net; // define network as net | |
void dumpWeights() | |
{ | |
puts("l1w:"); | |
for(int i = 0; i < UNITS; i++) | |
for(int j = 0; j < 3; j++) | |
printf("%i ", net.l1w[i][j]); | |
printf("\n\n"); | |
puts("l2w:"); | |
for(int i = 0; i < UNITS; i++) | |
for(int j = 0; j < UNITS; j++) | |
printf("%i ", net.l2w[i][j]); | |
printf("\n\n"); | |
puts("l3w:"); | |
for(int i = 0; i < UNITS; i++) | |
for(int j = 0; j < UNITS; j++) | |
printf("%i ", net.l3w[i][j]); | |
printf("\n\n"); | |
puts("l4w:"); | |
for(int i = 0; i < UNITS; i++) | |
printf("%i ", net.l3w[0][i]); | |
printf("\n\n"); | |
} | |
void layerStat() | |
{ | |
int min=999, max=0, avg=0, avgd=0; | |
for(int i = 0; i < UNITS; i++) | |
{ | |
for(int j = 0; j < 3; j++) | |
{ | |
if(net.l1w[i][j] > max){max = net.l1w[i][j];} | |
if(net.l1w[i][j] < min){min = net.l1w[i][j];} | |
avg += net.l1w[i][j]; | |
avgd++; | |
} | |
} | |
avg /= avgd; | |
printf("l1w: %i %i %i [%i]\n", min, avg/avgd, max, avg); | |
min=999, max=0, avg=0, avgd=0; | |
for(int i = 0; i < UNITS; i++) | |
{ | |
for(int j = 0; j < UNITS; j++) | |
{ | |
if(net.l2w[i][j] > max){max = net.l2w[i][j];} | |
if(net.l2w[i][j] < min){min = net.l2w[i][j];} | |
avg += net.l2w[i][j]; | |
avgd++; | |
} | |
} | |
avg /= avgd; | |
printf("l2w: %i %i %i [%i]\n", min, avg/avgd, max, avg); | |
min=999, max=0, avg=0, avgd=0; | |
for(int i = 0; i < UNITS; i++) | |
{ | |
for(int j = 0; j < UNITS; j++) | |
{ | |
if(net.l3w[i][j] > max){max = net.l3w[i][j];} | |
if(net.l3w[i][j] < min){min = net.l3w[i][j];} | |
avg += net.l3w[i][j]; | |
avgd++; | |
} | |
} | |
avg /= avgd; | |
printf("l3w: %i %i %i [%i]\n", min, avg/avgd, max, avg); | |
min=999, max=0, avg=0, avgd=0; | |
for(int i = 0; i < UNITS; i++) | |
{ | |
if(net.l3w[0][i] > max){max = net.l3w[0][i];} | |
if(net.l3w[0][i] < min){min = net.l3w[0][i];} | |
avg += net.l3w[0][i]; | |
avgd++; | |
} | |
avg /= avgd; | |
printf("l3w: %i %i %i [%i]\n", min, avg/avgd, max, avg); | |
} | |
sint8 ReLU(sint8 s) // ReLU with leaky hyper-parameter | |
{ | |
return s < ReLU_LEAK ? ReLU_LEAK : s; | |
} | |
sint8 ReLU_D(sint8 s) // ReLU derivative | |
{ | |
return s > 0 ? 1 : 0; | |
} | |
sint8 RPROP(signed int input, signed int error) // RPROP optimiser | |
{ | |
return (input * error) < 0 ? -1 : 1; | |
} | |
sint8 srnd8() // random signed int8 | |
{ | |
return (sint8)((sint8)(rand()%255))-128; | |
} | |
sint8 srnd8_weight() // random signed int8 weight | |
{ | |
sint8 r=0; | |
while(r == 0){r = srnd8();} | |
return r; | |
} | |
int main() | |
{ | |
// seed rand | |
srand(777); | |
// random weights | |
for(uint i = 0; i < UNITS; i++) | |
for(uint j = 0; j < 3; j++) {net.l1w[i][j] = srnd8_weight(); net.l1b[i][0] = 0;} | |
for(uint i = 0; i < UNITS; i++) | |
for(uint j = 0; j < UNITS; j++) {net.l2w[i][j] = srnd8_weight(); net.l2b[i][0] = 0;} | |
for(uint i = 0; i < UNITS; i++) | |
for(uint j = 0; j < UNITS; j++) {net.l3w[i][j] = srnd8_weight(); net.l3b[i][0] = 0;} | |
for(uint i = 0; i < UNITS; i++) {net.l4w[0][i] = srnd8_weight();} | |
net.l4b = 0; | |
//dumpWeights(); | |
// training switch | |
unsigned char ts = 0; | |
// sign tracking for each weight and bias for backprop | |
sint8 l1ws[UNITS][3] = {0}; | |
sint8 l2ws[UNITS][UNITS] = {0}; | |
sint8 l3ws[UNITS][UNITS] = {0}; | |
sint8 l4ws[1][UNITS] = {0}; | |
sint8 l1bs[UNITS][1] = {0}; | |
sint8 l2bs[UNITS][1] = {0}; | |
sint8 l3bs[UNITS][1] = {0}; | |
sint8 l4bs = 0; | |
// train | |
for(uint e = 0; e < EPOCHS; e++) | |
{ | |
// forward pass input gen | |
sint8 input[3]; | |
if(ts == 0) | |
{ | |
input[0] = 3; | |
input[1] = 3; | |
input[2] = 3; | |
} | |
else | |
{ | |
do | |
{ | |
input[0] = srnd8(); | |
input[1] = srnd8(); | |
input[2] = srnd8(); | |
} | |
while(input[0] == 3 && input[1] == 3 && input[2] == 3); | |
} | |
// --- | |
// -------- | |
// --- | |
// forward pass | |
sint8 l1o[UNITS]; | |
for(uint i = 0; i < UNITS; i++) | |
l1o[i] = ReLU(cap_add(cap_add(cap_multiply(input[0], net.l1w[i][0]), cap_multiply(input[1], net.l1w[i][1])), cap_add(cap_multiply(input[2], net.l1w[i][2]), net.l1b[i][0]))); | |
sint8 l2o[UNITS]; | |
for(uint i = 0; i < UNITS; i++) | |
{ | |
sint8 wa = 0; | |
for(uint j = 0; j < UNITS; j++) | |
wa = cap_add(wa, cap_multiply(l1o[j], net.l2w[i][j])); | |
l2o[i] = ReLU(cap_add(wa, net.l2b[i][0])); | |
} | |
sint8 l3o[UNITS]; | |
for(uint i = 0; i < UNITS; i++) | |
{ | |
sint8 wa = 0; | |
for(uint j = 0; j < UNITS; j++) | |
wa = cap_add(wa, cap_multiply(l2o[j], net.l3w[i][j])); | |
l3o[i] = ReLU(cap_add(wa, net.l3b[i][0])); | |
} | |
sint8 l4o = 0; | |
for(uint i = 0; i < UNITS; i++) | |
l4o = cap_add(l4o, cap_multiply(l2o[i], net.l4w[0][i])); | |
l4o = cap_add(l4o, net.l4b); | |
// calc slope of loss | |
sint8 loss; | |
if(ts == 0) | |
loss = cap_sub(TRAIN_MAX, l4o); | |
else | |
loss = cap_sub(TRAIN_MIN, l4o); | |
if(ts == 0){layerStat();} | |
if(ts == 0){printf("[%u] [%i] 333: %i (%i)\n", e, ts, l4o, loss);} | |
else {printf("[%u] [%i] not: %i (%i)\n", e, ts, l4o, loss);} | |
// --- | |
// -------- | |
// --- | |
// backpass calc error/gradient buffers | |
signed int ler; // storing cumulative error per layer | |
// --- | |
// get cumulative error of single output unit (layer4) | |
ler = 0; | |
for(uint i = 0; i < UNITS; i++){ler += net.l4w[0][i] * loss;} | |
ler += net.l4b * loss; | |
// set unit error of lower layer3 units | |
signed int l3e[UNITS]; | |
for(uint i = 0; i < UNITS; i++){l3e[i] = ReLU_D(l3o[i]) * ler;} | |
// --- | |
// get cumulative error of layer3 units | |
ler = 0; | |
for(uint i = 0; i < UNITS; i++) | |
{ | |
for(uint j = 0; j < UNITS; j++) | |
ler += net.l3w[i][j] * l3e[i]; | |
ler += net.l3b[i][0] * l3e[i]; | |
} | |
// set each unit error of lower layer2 units | |
signed int l2e[UNITS]; | |
for(uint i = 0; i < UNITS; i++){l2e[i] = ReLU_D(l2o[i]) * ler;} | |
// --- | |
// get cumulative error of layer2 units | |
ler = 0; | |
for(uint i = 0; i < UNITS; i++) | |
{ | |
for(uint j = 0; j < UNITS; j++) | |
ler += net.l2w[i][j] * l2e[i]; | |
ler += net.l2b[i][0] * l2e[i]; | |
} | |
// set unit error of lower layer1 units | |
signed int l1e[UNITS]; | |
for(uint i = 0; i < UNITS; i++){l1e[i] = ReLU_D(l1o[i]) * ler;} | |
// --- | |
// -------- | |
// --- | |
// forwardpass nudge weights based on error/gradient sign | |
for(uint i = 0; i < UNITS; i++) // layer1 | |
{ | |
for(uint j = 0; j < 3; j++) | |
{ | |
// nudge weight by sign direction | |
sint8 ns = RPROP(input[j], l1e[i]); | |
if(ns != l1ws[i][j]){l1ws[i][j] = ns;} // if different update sign | |
else{net.l1w[i][j] = cap_add(net.l1w[i][j], ns);} // if same move in direction of gradient | |
// nudge bias by sign direction | |
ns = RPROP(1, l1e[i]); | |
if(ns != l1bs[i][0]){l1bs[i][0] = ns;} // if different update sign | |
else{net.l1b[i][j] = cap_add(net.l1b[i][j], ns);} // if same move in direction of gradient | |
} | |
} | |
for(uint i = 0; i < UNITS; i++) // layer2 | |
{ | |
for(uint j = 0; j < UNITS; j++) | |
{ | |
// nudge weight by sign direction | |
sint8 ns = RPROP(l1o[j], l2e[i]); | |
if(ns != l2ws[i][j]){l2ws[i][j] = ns;} // if different update sign | |
else{net.l2w[i][j] = cap_add(net.l2w[i][j], ns);} // if same move in direction of gradient | |
// nudge bias by sign direction | |
ns = RPROP(1, l2e[i]); | |
if(ns != l2bs[i][0]){l2bs[i][0] = ns;} // if different update sign | |
else{net.l2b[i][j] = cap_add(net.l2b[i][j], ns);} // if same move in direction of gradient | |
} | |
} | |
for(uint i = 0; i < UNITS; i++) // layer3 | |
{ | |
for(uint j = 0; j < UNITS; j++) | |
{ | |
// nudge weight by sign direction | |
sint8 ns = RPROP(l2o[j], l3e[i]); | |
if(ns != l3ws[i][j]){l3ws[i][j] = ns;} // if different update sign | |
else{net.l3w[i][j] = cap_add(net.l3w[i][j], ns);} // if same move in direction of gradient | |
// nudge bias by sign direction | |
ns = RPROP(1, l3e[i]); | |
if(ns != l3bs[i][0]){l3bs[i][0] = ns;} // if different update sign | |
else{net.l3b[i][j] = cap_add(net.l3b[i][j], ns);} // if same move in direction of gradient | |
} | |
} | |
for(uint i = 0; i < UNITS; i++) // layer4 (output) | |
{ | |
// nudge weight by sign direction | |
sint8 ns = RPROP(l3o[i], loss); | |
if(ns != l4ws[0][i]){l4ws[0][i] = ns;} // if different update sign | |
else{net.l4w[0][i] = cap_add(net.l4w[0][i], ns);} // if same move in direction of gradient | |
// nudge bias by sign direction | |
ns = RPROP(1, loss); | |
if(ns != l4bs){l4bs = ns;} // if different update sign | |
else{net.l4b = cap_add(net.l4b, ns);} // if same move in direction of gradient | |
} | |
// --- | |
// -------- | |
// --- | |
// Auto kill if its stops optimising after x epoches | |
static sint8 laslos = 0, laslosc = 0; | |
if(ts == 1) | |
{ | |
if(loss == laslos){laslosc++;} | |
else | |
{ | |
laslos = loss; | |
laslosc = 0; | |
} | |
if(laslosc > 9){return 0;} | |
} | |
// input flip switch | |
ts = 1 - ts; | |
if(ts == 0){puts("");} | |
} | |
// done | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment