Created
August 31, 2020 21:08
-
-
Save umbra-scientia/153e94f218ad2b6a8616c219cbaef804 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <unistd.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdio.h> | |
#include <math.h> | |
float RandomFloat() { | |
int r = rand() - rand(); | |
return float(r) / float(RAND_MAX); | |
} | |
template<int dim> | |
struct Vector { | |
float elem[dim]; | |
Vector() { memset(elem, 0, sizeof(elem)); } | |
Vector(float v) { | |
for(int i=0;i<dim;i++) elem[i] = v; | |
} | |
float& operator[](int x) { return elem[x]; } | |
void Initialize() { | |
for(int i=0;i<dim;i++) elem[i] = RandomFloat(); | |
} | |
void Normalize(float len = 1.0) { | |
float sqsum = 0.0; | |
for(int i=0;i<dim;i++) sqsum += elem[i] * elem[i]; | |
float scale = len / sqrt(sqsum); | |
for(int i=0;i<dim;i++) elem[i] *= scale; | |
} | |
void operator+=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] += other.elem[i];} | |
void operator-=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] -= other.elem[i];} | |
void operator*=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] *= other.elem[i];} | |
void operator/=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] /= other.elem[i];} | |
void operator&=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] = fmin(elem[i], other.elem[i]);} | |
void operator|=(const Vector& other) {for(int i=0;i<dim;i++) elem[i] = fmax(elem[i], other.elem[i]);} | |
Vector operator+(Vector other) const { other += *this; return other; } | |
Vector operator-(Vector other) const { other -= *this; return other; } | |
Vector operator*(Vector other) const { other *= *this; return other; } | |
Vector operator/(Vector other) const { other /= *this; return other; } | |
Vector operator&(Vector other) const { other &= *this; return other; } | |
Vector operator|(Vector other) const { other |= *this; return other; } | |
Vector relu(float leak = 0.0) const { | |
Vector out; | |
for(int i=0;i<dim;i++) out.elem[i] = fmax(elem[i], elem[i]*leak); | |
return out; | |
} | |
Vector clip(float cap = 2.0) const { | |
Vector out; | |
for(int i=0;i<dim;i++) out.elem[i] = fmax(fmin(elem[i], cap), -cap); | |
return out; | |
} | |
void Print() const { | |
printf("["); | |
for(int i=0;i<dim;i++) { | |
if (i) printf(" "); | |
printf("%+f", elem[i]); | |
} | |
printf("]\n"); | |
} | |
}; | |
template<int out_dim, int in_dim> struct Layer { | |
virtual void Initialize() = 0; | |
virtual void Transform(Vector<out_dim>& out, const Vector<in_dim>& in) = 0; | |
virtual void ClearGrad() = 0; | |
virtual Vector<in_dim> AccumGrad(const Vector<out_dim>& gradient_in, const Vector<in_dim>& value_in) = 0; | |
virtual void ApplyGrad(float learn_rate) = 0; | |
}; | |
template<int out_dim, int in_dim> struct Matrix : public Layer<out_dim, in_dim> { | |
Vector<out_dim> row[in_dim+1]; | |
Vector<out_dim> grad[in_dim+1]; | |
float& operator[](int x) { return row[x]; } | |
void Initialize() { | |
float scale = 1.0 / sqrt(in_dim + 1.0); | |
for(int i=0;i<in_dim+1;i++) { | |
row[i].Initialize(); | |
row[i].Normalize(scale); | |
} | |
} | |
void Identity() { | |
memset(row, 0, sizeof(row)); | |
int c = out_dim; | |
if (in_dim < c) c = in_dim; | |
for(;c >= 0; c--) { | |
row[c][c] = 1.0; | |
} | |
} | |
void Transform(Vector<out_dim>& out, const Vector<in_dim>& in) { | |
out = row[in_dim]; | |
for(int i=0;i<in_dim;i++) { | |
out += row[i] * in.elem[i]; | |
} | |
} | |
void ClearGrad() { | |
memset(grad, 0, sizeof(grad)); | |
} | |
Vector<in_dim> AccumGrad(const Vector<out_dim>& gradient_in, const Vector<in_dim>& value_in) { | |
Vector<in_dim> gradient_out; | |
grad[in_dim] += gradient_in; | |
for(int j=0;j<in_dim;j++) { | |
grad[j] += gradient_in * value_in.elem[j]; | |
for(int i=0;i<out_dim;i++) { | |
gradient_out[j] += row[i][j] * gradient_in.elem[i]; | |
} | |
} | |
return gradient_out; | |
} | |
void ApplyGrad(float learn_rate) { | |
for(int i=0;i<in_dim+1;i++) { | |
row[i] += grad[i] * learn_rate; | |
} | |
} | |
void ClipGrad(float scale = 1.0, float threshold = 1.0) { | |
for(int i=0;i<in_dim+1;i++) { | |
for(int j=0;j<out_dim;j++) { | |
const float v = fabs(grad[i][j]); | |
if (v > threshold) { | |
threshold = v; | |
} | |
} | |
} | |
if (threshold <= 0) return; | |
threshold = scale / threshold; | |
for(int i=0;i<in_dim+1;i++) { | |
for(int j=0;j<out_dim;j++) { | |
grad[i][j] *= threshold; | |
} | |
} | |
} | |
void Regularize(float lr) { | |
for(int i=0;i<in_dim+1;i++) { | |
row[i] -= row[i] * lr * 1e-4; | |
} | |
} | |
}; | |
template<int dim> struct ReLU : public Layer<dim, dim> { | |
void Initialize() {} | |
void Transform(Vector<dim>& out, const Vector<dim>& in) { | |
for(int i=0;i<dim;i++) { | |
if (in[i] > 0.0) { | |
out[i] = in[i]; | |
} else { | |
out[i] = 0.0; | |
} | |
} | |
} | |
void ClearGrad() {} | |
Vector<dim> AccumGrad(const Vector<dim>& gradient_in, const Vector<dim>& value_in) { | |
Vector<dim> gradient_out; | |
for(int i=0;i<dim;i++) { | |
if (value_in[i] >= 0.0) { | |
gradient_out[i] = gradient_in[i]; | |
} else { | |
gradient_out[i] = 0.0; | |
} | |
} | |
return gradient_out; | |
} | |
void ApplyGrad(float learn_rate) {} | |
}; | |
template<int N> void PrintVecs(const Vector<N> vec[], int n) { | |
for(int i=0;i<n;i++) vec[i].Print(); | |
} | |
int main() { | |
float lr = 0.05; | |
Matrix<5,5> test1, test2; | |
test1.Initialize(); | |
test2.Initialize(); | |
test1.Identity(); | |
for(int i=0;i<9001;i++) { | |
Vector<5> in1, in2, out; | |
in1.Initialize(); | |
test1.Regularize(lr); | |
test2.Regularize(lr); | |
test1.Transform(in2, in1); | |
test2.Transform(out, in2); | |
printf("in1=");in1.Print(); | |
printf("out=");out.Print(); | |
test1.ClearGrad(); | |
test2.ClearGrad(); | |
auto grad2 = out - in1; | |
auto grad1 = test2.AccumGrad(grad2, in2); | |
test2.ClipGrad(); | |
auto grad0 = test1.AccumGrad(grad1, in1); | |
test1.ClipGrad(); | |
printf("test1.grad (%d)\n", i); | |
PrintVecs(test1.grad, 6); | |
printf("test2.grad (%d)\n", i); | |
PrintVecs(test2.grad, 6); | |
test1.ApplyGrad(lr); | |
test2.ApplyGrad(lr); | |
printf("test1.row (%d)\n", i); | |
PrintVecs(test1.row, 6); | |
printf("test2.row (%d)\n", i); | |
PrintVecs(test2.row, 6); | |
usleep(100000); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment