Last active
June 8, 2017 04:44
-
-
Save dyigitpolat/0348f4a581d35c77b41b9ef730444bff to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdio.h" | |
#include "stdlib.h" | |
#include "math.h" | |
#include "time.h" | |
#define N 9 | |
double in1, in2; | |
double w[N]; | |
int train_loops = 100000; | |
double activation( double sum) | |
{ | |
return (tanh(sum) + 1.0) / 2.0; | |
//ReLU Activation -not used right now. | |
if( sum > 0) | |
return sum; | |
else | |
return 0; | |
} | |
double a_out() | |
{ | |
return in1; | |
} | |
double b_out() | |
{ | |
return in2; | |
} | |
double c_out() | |
{ | |
double dot = a_out()*w[0] + b_out()*w[2] + w[6]; | |
return activation(dot); | |
} | |
double d_out() | |
{ | |
double dot = a_out()*w[1] + b_out()*w[3] + w[7]; | |
return activation(dot); | |
} | |
double y() | |
{ | |
double dot = c_out()*w[4] + d_out()*w[5] + w[8]; | |
return activation(dot); | |
} | |
double magnitude( const double* vector) | |
{ | |
int i; | |
double magnitude = 0.0; | |
for( i = 0; i < N; i++) | |
{ | |
magnitude += vector[i]*vector[i]; //distance from the origin | |
} | |
return sqrt(magnitude); | |
} | |
void normalize( const double* vector, double* out) | |
{ | |
double mag; | |
int i; | |
mag = magnitude(vector); | |
if( mag == 0.0) | |
{ | |
for( i = 0; i < N; i++) | |
{ | |
out[i] = 0; //make its magnitude 1. | |
} | |
return; | |
} | |
for( i = 0; i < N; i++) | |
{ | |
out[i] = vector[i] / mag; //make its magnitude 1. | |
} | |
} | |
void scale( double* c, const double* a, double b) //c = a * b | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
c[i] = a[i] * b; | |
} | |
} | |
void subtract( double* c, const double* a, const double* b) //c = a - b | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
c[i] = a[i] - b[i]; | |
} | |
} | |
void add( double* c, const double* a, const double* b) //c = a - b | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
c[i] = a[i] + b[i]; | |
} | |
} | |
void copy( double* a, const double* b) //a = b | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
a[i] = b[i]; //distance from the origin | |
} | |
} | |
void print( double* vec) | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
printf( "%d: %f \n", i, vec[i]); | |
} | |
printf( "\n"); | |
} | |
void init_network() | |
{ | |
int i; | |
for( i = 0; i < N; i++) | |
{ | |
if( rand() % 2) | |
w[i] = ((rand() % 10000) * 0.0002) * 0.02 + 0.2; | |
else | |
w[i] = -((rand() % 10000) * 0.0002) * 0.02 - 0.2; | |
} | |
printf( "network initialized. \n"); | |
} | |
double compute_loss( double target) | |
{ | |
if( ( y() - target)*( y() - target) > 0.1) | |
return ( y() - target)*( y() - target); | |
else | |
return 0; | |
} | |
//TODO: | |
void compute_backprop_gradient( double* gradient, double target) | |
{ | |
} | |
void compute_ReLU_gradient( double* gradient, double target) | |
{ | |
gradient[0] = (w[0] > 0 ? in1*w[4] : 0); | |
gradient[1] = (w[1] > 0 ? in1*w[4] : 0); | |
gradient[2] = (w[2] > 0 ? in2*w[5] : 0); | |
gradient[3] = (w[3] > 0 ? in2*w[5] : 0); | |
gradient[4] = (w[4] > 0 ? in1*w[0] + in1*w[1] : 0); | |
gradient[5] = (w[5] > 0 ? in2*w[2] + in2*w[3] : 0); | |
if( y() - target > 0) | |
scale( gradient, gradient, -1.0); | |
} | |
void compute_numerical_gradient( double* gradient, double target) | |
{ | |
double temp[N]; | |
double temp2[N]; | |
double prev_loss; | |
double d; | |
int i; | |
prev_loss = compute_loss( target); | |
//printf(">>>%f\n", prev_loss); | |
d = 0.00000001; | |
for( i = 0; i < N; i++) | |
{ | |
copy( temp2, w); | |
copy( temp, w); | |
temp[i] += d; | |
copy( w, temp); | |
gradient[i] = compute_loss( target) - prev_loss; | |
copy( w, temp2); | |
} | |
} | |
void compute_gradient( double* gradient, double target) | |
{ | |
compute_numerical_gradient( gradient, target); | |
} | |
void train( int gate) | |
{ | |
double gradient[N]; | |
double learning_rate; | |
int i; | |
learning_rate = 0.00000001; | |
for( i = 0; i < train_loops; i++) | |
{ | |
int r = rand(); | |
int a = r % 2; | |
int b = ( r >> 1) % 2; | |
int c = 0; | |
switch( gate) | |
{ | |
case 0: c = (a&b); break; | |
case 1: c = (a|b); break; | |
case 2: c = (a^b); break; | |
case 3: c = (~(a&b) & 0x0001); break; | |
case 4: c = (~(a|b) & 0x0001); break; | |
case 5: c = (~(a^b) & 0x0001); break; | |
default: c = (a&b); break; | |
} | |
in1 = a; | |
in2 = b; | |
double target = c; | |
compute_gradient( gradient, target); | |
scale( gradient, gradient, learning_rate); | |
normalize( gradient, gradient); | |
subtract( w, w, gradient); | |
} | |
} | |
void test_logic() | |
{ | |
int i; | |
for( i = 0; i < 4; ++i) | |
{ | |
int a = i % 2; | |
int b = ( i >> 1) % 2; | |
in1 = a; | |
in2 = b; | |
int c = 0; | |
if( y() > 0.5) | |
{ | |
c = 1; | |
} | |
printf( "%d %d: %d ( Y = %f) \n", a, b, c, y()); | |
} | |
} | |
int main( int argc, char** argv) | |
{ | |
srand(time(NULL)); | |
printf( "\n"); | |
init_network(); | |
test_logic(); | |
printf( "untrained test complete. \n\n\n"); | |
int i; | |
for( i = 0; i < 6; i++) | |
{ | |
init_network(); | |
print(w); | |
train( i); | |
switch( i) | |
{ | |
case 0: printf( "trained AND gate. \n"); break; | |
case 1: printf( "trained OR gate. \n"); break; | |
case 2: printf( "trained XOR gate. \n"); break; | |
case 3: printf( "trained NAND gate. \n"); break; | |
case 4: printf( "trained NOR gate. \n"); break; | |
case 5: printf( "trained XNOR gate. \n"); break; | |
default: printf( "trained 0 gate. \n"); break; | |
} | |
print(w); | |
test_logic(); | |
printf( "test complete. \n\n"); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment