-
-
Save antirez/e45939b918868b91ec6fea1d1938db0d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __NN_H | |
#define __NN_H | |
/* Data structures. | |
* Nets are not so 'dynamic', but enough to support | |
* an arbitrary number of layers, with arbitrary units for layer. | |
* Only fully connected feed-forward networks are supported. */ | |
struct AnnLayer { | |
int units; | |
double *output; /* output[i], output of i-th unit */ | |
double *error; /* error[i], output error of i-th unit*/ | |
double *weight; /* weight[(i*units)+j] */ | |
/* weight between unit i-th and next j-th */ | |
double *gradient; /* gradient[(i*units)+j] gradient */ | |
double *pgradient; /* pastgradient[(i*units)+j] t-1 gradient */ | |
/* (t-1 sgradient for resilient BP) */ | |
double *delta; /* delta[(i*units)+j] cumulative update */ | |
/* (per-weight delta for RPROP) */ | |
double *sgradient; /* gradient for the full training set */ | |
/* only used for RPROP */ | |
}; | |
/* Feed forward network structure */ | |
struct Ann { | |
int flags; | |
int layers; | |
double rprop_nminus; | |
double rprop_nplus; | |
double rprop_maxupdate; | |
double rprop_minupdate; | |
struct AnnLayer *layer; | |
}; | |
/* Raw interface to data structures */ | |
#define OUTPUT(net,l,i) (net)->layer[l].output[i] | |
#define ERROR(net,l,i) (net)->layer[l].error[i] | |
#define WEIGHT(net,l,i,j) (net)->layer[l].weight[((i)*(net)->layer[l-1].units)+(j)] | |
#define GRADIENT(net,l,i,j) (net)->layer[l].gradient[((i)*(net)->layer[l-1].units)+(j)] | |
#define SGRADIENT(net,l,i,j) (net)->layer[l].sgradient[((i)*(net)->layer[l-1].units)+(j)] | |
#define PGRADIENT(net,l,i,j) (net)->layer[l].pgradient[((i)*(net)->layer[l-1].units)+(j)] | |
#define DELTA(net,l,i,j) (net)->layer[l].delta[((i)*(net)->layer[l-1].units)+(j)] | |
#define LAYERS(net) (net)->layers | |
#define UNITS(net,l) (net)->layer[l].units | |
#define WEIGHTS(net,l) (UNITS(net,l)*UNITS(net,l-1)) | |
#define LEARN_RATE(net) (net)->learn_rate | |
#define MOMENTUM(net) (net)->momentum | |
#define OUTPUT_NODE(net,i) OUTPUT(net,0,i) | |
#define INPUT_NODE(net,i) OUTPUT(net,((net)->layers)-1,i) | |
#define OUTPUT_UNITS(net) UNITS(net,0) | |
#define INPUT_UNITS(net) (UNITS(net,((net)->layers)-1)-(LAYERS(net)>2)) | |
#define RPROP_NMINUS(net) (net)->rprop_nminus | |
#define RPROP_NPLUS(net) (net)->rprop_nplus | |
#define RPROP_MAXUPDATE(net) (net)->rprop_maxupdate | |
#define RPROP_MINUPDATE(net) (net)->rprop_minupdate | |
/* Constants */ | |
#define DEFAULT_RPROP_NMINUS 0.5 | |
#define DEFAULT_RPROP_NPLUS 1.2 | |
#define DEFAULT_RPROP_MAXUPDATE 50 | |
#define DEFAULT_RPROP_MINUPDATE 0.000001 | |
#define RPROP_INITIAL_DELTA 0.1 | |
/* Misc */ | |
#define MAX(a,b) (((a)>(b))?(a):(b)) | |
#define MIN(a,b) (((a)<(b))?(a):(b)) | |
/* Prototypes */ | |
void AnnResetLayer(struct AnnLayer *layer); | |
struct Ann *AnnAlloc(int layers); | |
void AnnFreeLayer(struct AnnLayer *layer); | |
void AnnFree(struct Ann *net); | |
int AnnInitLayer(struct Ann *net, int i, int units, int bias); | |
struct Ann *AnnCreateNet(int layers, int *units); | |
struct Ann *AnnCreateNet2(int iunits, int ounits); | |
struct Ann *AnnCreateNet3(int iunits, int hunits, int ounits); | |
struct Ann *AnnClone(struct Ann* net); | |
void AnnSimulate(struct Ann *net); | |
void Ann2Tcl(struct Ann *net); | |
void AnnPrint(struct Ann *net); | |
double AnnGlobalError(struct Ann *net, double *desidered); | |
void AnnSetInput(struct Ann *net, double *input); | |
double AnnSimulateError(struct Ann *net, double *input, double *desidered); | |
void AnnCalculateGradientsTrivial(struct Ann *net, double *desidered); | |
void AnnCalculateGradients(struct Ann *net, double *desidered); | |
void AnnSetDeltas(struct Ann *net, double val); | |
void AnnResetDeltas(struct Ann *net); | |
void AnnResetSgradient(struct Ann *net); | |
void AnnSetRandomWeights(struct Ann *net); | |
void AnnScaleWeights(struct Ann *net, double factor); | |
void AnnUpdateDeltasGD(struct Ann *net); | |
void AnnUpdateDeltasGDM(struct Ann *net); | |
void AnnUpdateSgradient(struct Ann *net); | |
void AnnAdjustWeights(struct Ann *net); | |
double AnnBatchGDEpoch(struct Ann *net, double *input, double *desidered, int setlen); | |
double AnnBatchGDMEpoch(struct Ann *net, double *input, double *desidered, int setlen); | |
void AnnAdjustWeightsResilientBP(struct Ann *net); | |
double AnnResilientBPEpoch(struct Ann *net, double *input, double *desidered, int setlen); | |
int AnnTrain(struct Ann *net, double *input, double *desidered, double maxerr, int maxepochs, int setlen); | |
#endif /* __NN_H */ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* RPROP Neural Networks implementation | |
* See: http://deeplearning.cs.cmu.edu/pdfs/Rprop.pdf | |
* | |
* Copyright(C) 2003-2016 Salvatore Sanfilippo | |
* Licensed under BSD 3 clause license. */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <time.h> | |
#include <string.h> | |
#include "nn.h" | |
/* Node Transfer Function */ | |
double sigmoid(double x) { | |
return (double)1/(1+exp(-x)); | |
} | |
/* Reset layer data to zero-units */ | |
void AnnResetLayer(struct AnnLayer *layer) { | |
layer->units = 0; | |
layer->output = NULL; | |
layer->error = NULL; | |
layer->weight = NULL; | |
layer->gradient = NULL; | |
layer->pgradient = NULL; | |
layer->delta = NULL; | |
layer->sgradient = NULL; | |
} | |
/* Allocate and return an initialized N-layers network */ | |
struct Ann *AnnAlloc(int layers) { | |
struct Ann *net; | |
int i; | |
/* Alloc the net structure */ | |
if ((net = malloc(sizeof(*net))) == NULL) | |
return NULL; | |
/* Alloc layers */ | |
if ((net->layer = malloc(sizeof(struct AnnLayer)*layers)) == NULL) { | |
free(net); | |
return NULL; | |
} | |
net->layers = layers; | |
net->flags = 0; | |
net->rprop_nminus = DEFAULT_RPROP_NMINUS; | |
net->rprop_nplus = DEFAULT_RPROP_NPLUS; | |
net->rprop_maxupdate = DEFAULT_RPROP_MAXUPDATE; | |
net->rprop_minupdate = DEFAULT_RPROP_MINUPDATE; | |
/* Init layers */ | |
for (i = 0; i < layers; i++) | |
AnnResetLayer(&net->layer[i]); | |
return net; | |
} | |
/* Free a single layer */ | |
void AnnFreeLayer(struct AnnLayer *layer) | |
{ | |
free(layer->output); | |
free(layer->error); | |
free(layer->weight); | |
free(layer->gradient); | |
free(layer->pgradient); | |
free(layer->delta); | |
free(layer->sgradient); | |
AnnResetLayer(layer); | |
} | |
/* Free the target net */ | |
void AnnFree(struct Ann *net) | |
{ | |
int i; | |
/* Free layer data */ | |
for (i = 0; i < net->layers; i++) AnnFreeLayer(&net->layer[i]); | |
/* Free allocated layers structures */ | |
free(net->layer); | |
/* And the main structure itself */ | |
free(net); | |
} | |
/* Init a layer of the net with the specified number of units. | |
* Return non-zero on out of memory. */ | |
int AnnInitLayer(struct Ann *net, int i, int units, int bias) { | |
if (bias) units++; /* Take count of the bias unit */ | |
net->layer[i].output = malloc(sizeof(double)*units); | |
net->layer[i].error = malloc(sizeof(double)*units); | |
if (i) { /* not for output layer */ | |
net->layer[i].weight = | |
malloc(sizeof(double)*units*net->layer[i-1].units); | |
net->layer[i].gradient = | |
malloc(sizeof(double)*units*net->layer[i-1].units); | |
net->layer[i].pgradient = | |
malloc(sizeof(double)*units*net->layer[i-1].units); | |
net->layer[i].delta = | |
malloc(sizeof(double)*units*net->layer[i-1].units); | |
net->layer[i].sgradient = | |
malloc(sizeof(double)*units*net->layer[i-1].units); | |
} | |
net->layer[i].units = units; | |
/* Check for out of memory conditions */ | |
if (net->layer[i].output == NULL || | |
net->layer[i].error == NULL || | |
(i && net->layer[i].weight == NULL) || | |
(i && net->layer[i].gradient == NULL) || | |
(i && net->layer[i].pgradient == NULL) || | |
(i && net->layer[i].sgradient == NULL) || | |
(i && net->layer[i].delta == NULL)) | |
{ | |
AnnFreeLayer(&net->layer[i]); | |
AnnResetLayer(&net->layer[i]); | |
return 1; | |
} | |
/* Set all the values to zero */ | |
memset(net->layer[i].output, 0, sizeof(double)*units); | |
memset(net->layer[i].error, 0, sizeof(double)*units); | |
if (i) { | |
memset(net->layer[i].weight, 0, | |
sizeof(double)*units*net->layer[i-1].units); | |
memset(net->layer[i].gradient, 0, | |
sizeof(double)*units*net->layer[i-1].units); | |
memset(net->layer[i].pgradient, 0, | |
sizeof(double)*units*net->layer[i-1].units); | |
memset(net->layer[i].delta, 0, | |
sizeof(double)*units*net->layer[i-1].units); | |
memset(net->layer[i].sgradient, 0, | |
sizeof(double)*units*net->layer[i-1].units); | |
} | |
/* Set the bias unit output to 1 */ | |
if (bias) net->layer[i].output[units-1] = 1; | |
return 0; | |
} | |
/* Clone a network. On out of memory NULL is returned. */ | |
struct Ann *AnnClone(struct Ann* net) { | |
struct Ann* copy; | |
int j; | |
if ((copy = AnnAlloc(LAYERS(net))) == NULL) return NULL; | |
for (j = 0; j < LAYERS(net); j++) { | |
struct AnnLayer *ldst, *lsrc; | |
int units = UNITS(net,j); | |
int weights = WEIGHTS(net,j); | |
if (AnnInitLayer(copy, j, UNITS(net,j), 0)) { | |
AnnFree(copy); | |
return NULL; | |
} | |
lsrc = &net->layer[j]; | |
ldst = ©->layer[j]; | |
if (lsrc->output) | |
memcpy(ldst->output, lsrc->output, sizeof(double)*units); | |
if (lsrc->error) | |
memcpy(ldst->error, lsrc->error, sizeof(double)*units); | |
if (lsrc->weight) | |
memcpy(ldst->weight, lsrc->weight, sizeof(double)*weights); | |
if (lsrc->gradient) | |
memcpy(ldst->gradient, lsrc->gradient, sizeof(double)*weights); | |
if (lsrc->pgradient) | |
memcpy(ldst->pgradient, lsrc->pgradient, sizeof(double)*weights); | |
if (lsrc->delta) | |
memcpy(ldst->delta, lsrc->delta, sizeof(double)*weights); | |
if (lsrc->sgradient) | |
memcpy(ldst->sgradient, lsrc->sgradient, sizeof(double)*weights); | |
} | |
copy->rprop_nminus = net->rprop_nminus; | |
copy->rprop_nplus = net->rprop_nplus; | |
copy->rprop_maxupdate = net->rprop_maxupdate; | |
copy->rprop_minupdate = net->rprop_minupdate; | |
copy->flags = net->flags; | |
return copy; | |
} | |
/* Create a N-layer input/hidden/output net. | |
* The units array should specify the number of | |
* units in every layer from the output to the input layer. */ | |
struct Ann *AnnCreateNet(int layers, int *units) { | |
struct Ann *net; | |
int i; | |
if ((net = AnnAlloc(layers)) == NULL) return NULL; | |
for (i = 0; i < layers; i++) { | |
if (AnnInitLayer(net, i, units[i], i > 1)) { | |
AnnFree(net); | |
return NULL; | |
} | |
} | |
AnnSetRandomWeights(net); | |
AnnSetDeltas(net, RPROP_INITIAL_DELTA); | |
return net; | |
} | |
/* Create a 3-layer input/hidden/output net */ | |
struct Ann *AnnCreateNet3(int iunits, int hunits, int ounits) { | |
int units[3]; | |
units[0] = ounits; | |
units[1] = hunits; | |
units[2] = iunits; | |
return AnnCreateNet(3, units); | |
} | |
/* Create a 2-layer "linear" network. */ | |
struct Ann *AnnCreateNet2(int iunits, int ounits) { | |
int units[2]; | |
units[0] = ounits; | |
units[1] = iunits; | |
return AnnCreateNet(2, units); | |
} | |
/* Simulate the net one time. */ | |
void AnnSimulate(struct Ann *net) { | |
int i, j, k; | |
for (i = net->layers-1; i > 0; i--) { | |
int nextunits = net->layer[i-1].units; | |
int units = net->layer[i].units; | |
if (i > 2) nextunits--; /* dont output on bias units */ | |
for (j = 0; j < nextunits; j++) { | |
double A = 0, W; | |
for (k = 0; k < units; k++) { | |
W = WEIGHT(net, i, k, j); | |
A += W*OUTPUT(net, i, k); | |
} | |
OUTPUT(net, i-1, j) = sigmoid(A); | |
} | |
} | |
} | |
/* Create a Tcl procedure that simulates the neural network */ | |
void Ann2Tcl(struct Ann *net) { | |
int i, j, k; | |
printf("proc ann input {\n"); | |
printf(" set output {"); | |
for (i = 0; i < OUTPUT_UNITS(net); i++) { | |
printf("0 "); | |
} | |
printf("}\n"); | |
for (i = net->layers-1; i > 0; i--) { | |
int nextunits = net->layer[i-1].units; | |
int units = net->layer[i].units; | |
if (i > 2) nextunits--; /* dont output on bias units */ | |
for (j = 0; j < nextunits; j++) { | |
double W; | |
if (i == 1) { | |
printf(" lset output %d ", j); | |
} else { | |
printf(" set O_%d_%d", i-1, j); | |
} | |
printf(" [expr { \\\n"); | |
for (k = 0; k < units; k++) { | |
W = WEIGHT(net, i, k, j); | |
if (i > 1 && k == units-1) { | |
printf(" (%.9f)", W); | |
} else if (i == net->layers-1) { | |
printf(" (%.9f*[lindex $input %d])", W, k); | |
} else { | |
printf(" (%.9f*$O_%d_%d)", W, i, k); | |
} | |
if ((k+1) < units) printf("+ \\\n"); | |
} | |
printf("}]\n"); | |
if (i == 1) { | |
printf(" lset output %d [expr {1/(1+exp(-[lindex $output %d]))}]\n", j, j); | |
} else { | |
printf(" lset O_%d_%d [expr {1/(1+exp(-$O_%d_%d))}]\n", i-1, j, i-1, j); | |
} | |
} | |
} | |
printf(" return $output\n"); | |
printf("}\n"); | |
} | |
/* Print a network representation */ | |
void AnnPrint(struct Ann *net) { | |
int i, j, k; | |
for (i = 0; i < LAYERS(net); i++) { | |
if (i) { | |
/* Weights */ | |
printf("\t\tW"); | |
for (j = 0; j < UNITS(net, i); j++) { | |
printf("("); | |
for (k = 0; k < UNITS(net, i-1); k++) { | |
printf("%f", WEIGHT(net,i,j,k)); | |
if (k != UNITS(net, i-1)-1) | |
printf(" "); | |
} | |
printf(") "); | |
} | |
printf("\n"); | |
/* Gradients */ | |
printf("\t\tg"); | |
for (j = 0; j < UNITS(net, i); j++) { | |
printf("["); | |
for (k = 0; k < UNITS(net, i-1); k++) { | |
printf("%f", GRADIENT(net,i,j,k)); | |
if (k != UNITS(net, i-1)-1) | |
printf(" "); | |
} | |
printf("] "); | |
} | |
printf("\n"); | |
/* SGradients */ | |
printf("\t\tG"); | |
for (j = 0; j < UNITS(net, i); j++) { | |
printf("["); | |
for (k = 0; k < UNITS(net, i-1); k++) { | |
printf("%f", SGRADIENT(net,i,j,k)); | |
if (k != UNITS(net, i-1)-1) | |
printf(" "); | |
} | |
printf("] "); | |
} | |
printf("\n"); | |
/* Gradients at t-1 */ | |
printf("\t\tM"); | |
for (j = 0; j < UNITS(net, i); j++) { | |
printf("["); | |
for (k = 0; k < UNITS(net, i-1); k++) { | |
printf("%f", PGRADIENT(net,i,j,k)); | |
if (k != UNITS(net, i-1)-1) | |
printf(" "); | |
} | |
printf("] "); | |
} | |
printf("\n"); | |
/* Delta */ | |
printf("\t\tD"); | |
for (j = 0; j < UNITS(net, i); j++) { | |
printf("|"); | |
for (k = 0; k < UNITS(net, i-1); k++) { | |
printf("%f", DELTA(net,i,j,k)); | |
if (k != UNITS(net, i-1)-1) | |
printf(" "); | |
} | |
printf("| "); | |
} | |
printf("\n"); | |
} | |
for (j = 0; j < UNITS(net,i); j++) { | |
printf("%f ", OUTPUT(net,i,j)); | |
} | |
printf("\n"); | |
printf("\t\t/"); | |
for (j = 0; j < UNITS(net,i); j++) { | |
printf("%f ", ERROR(net,i,j)); | |
} | |
printf("/\n"); | |
} | |
} | |
/* Calcuate the global error of the net. This is just the | |
* Root Mean Square (RMS) error, which is half the sum of the squared | |
* errors. */ | |
double AnnGlobalError(struct Ann *net, double *desidered) { | |
double e, t; | |
int i, outputs = OUTPUT_UNITS(net); | |
e = 0; | |
for (i = 0; i < outputs; i++) { | |
t = desidered[i] - OUTPUT_NODE(net,i); | |
e += t*t; /* No need for fabs(t), t*t will always be positive. */ | |
} | |
return .5*e; | |
} | |
/* Set the network input */ | |
void AnnSetInput(struct Ann *net, double *input) | |
{ | |
int i, inputs = INPUT_UNITS(net); | |
for (i = 0; i < inputs; i++) INPUT_NODE(net,i) = input[i]; | |
} | |
/* Simulate the net, and return the global error */ | |
double AnnSimulateError(struct Ann *net, double *input, double *desidered) { | |
AnnSetInput(net, input); | |
AnnSimulate(net); | |
return AnnGlobalError(net, desidered); | |
} | |
/* Calculate gradients with a trivial and slow algorithm, this | |
* is useful to check that the real implementation is working | |
* well, comparing the results. | |
* | |
* The algorithm used is: to compute the error function in two | |
* points (E1, with the real weight, and E2 with the weight W = W + 0.1), | |
* than the approximation of the gradient is G = (E2-E1)/0.1. */ | |
#define GTRIVIAL_DELTA 0.001 | |
void AnnCalculateGradientsTrivial(struct Ann *net, double *desidered) { | |
int j, i, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
for (i = 0; i < weights; i++) { | |
double t, e1, e2; | |
/* Calculate the value of the error function | |
* in this point. */ | |
AnnSimulate(net); | |
e1 = AnnGlobalError(net, desidered); | |
t = net->layer[j].weight[i]; | |
/* Calculate the error a bit on the right */ | |
net->layer[j].weight[i] += GTRIVIAL_DELTA; | |
AnnSimulate(net); | |
e2 = AnnGlobalError(net, desidered); | |
/* Restore the original weight */ | |
net->layer[j].weight[i] = t; | |
/* Calculate the gradient */ | |
net->layer[j].gradient[i] = (e2-e1)/GTRIVIAL_DELTA; | |
} | |
} | |
} | |
/* Calculate gradients using the back propagation algorithm */ | |
void AnnCalculateGradients(struct Ann *net, double *desidered) { | |
int j, layers = LAYERS(net)-1; | |
/* First we need to calculate the error for every output | |
* node. */ | |
for (j = 0; j < OUTPUT_UNITS(net); j++) | |
net->layer[0].error[j] = net->layer[0].output[j] - desidered[j]; | |
/* Back-propagate the error and compute the gradient | |
* for every weight in the net. */ | |
for (j = 0; j < layers; j++) { | |
int units = UNITS(net, j); | |
int i; | |
/* Skip bias units */ | |
if (j > 1) units--; | |
/* Reset the next layer errors array */ | |
for (i = 0; i < UNITS(net,j+1); i++) net->layer[j+1].error[i] = 0; | |
/* For every node in this layer ... */ | |
for (i = 0; i < units; i++) { | |
double delta, e, o; | |
int k, prevunits; | |
/* Compute (d-o)*o*(1-o) */ | |
e = net->layer[j].error[i]; | |
o = net->layer[j].output[i]; | |
delta = e*o*(1-o); | |
/* For every weight between this node and | |
* the previous layer's nodes... */ | |
prevunits = UNITS(net,j+1); | |
for (k = 0; k < prevunits; k++) { | |
/* Calculate the gradient */ | |
GRADIENT(net,j+1,k,i) = delta * OUTPUT(net,j+1,k); | |
/* And back-propagate the error to | |
* the previous layer */ | |
ERROR(net,j+1,k) += delta * WEIGHT(net,j+1,k,i); | |
} | |
} | |
} | |
} | |
/* Set the delta values of the net to a given value */ | |
void AnnSetDeltas(struct Ann *net, double val) { | |
int j, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
int i; | |
for (i = 0; i < weights; i++) net->layer[j].delta[i] = val; | |
} | |
} | |
/* Set the sgradient values to zero */ | |
void AnnResetSgradient(struct Ann *net) { | |
int j, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
memset(net->layer[j].sgradient, 0, sizeof(double)*weights); | |
} | |
} | |
/* Set random weights in the range -0.05,+0.05 */ | |
void AnnSetRandomWeights(struct Ann *net) { | |
int j, layers = LAYERS(net); | |
srand(time(NULL)); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
int i; | |
for (i = 0; i < weights; i++) | |
net->layer[j].weight[i] = -0.05+.1*(rand()/(RAND_MAX+1.0)); | |
} | |
} | |
/* Scale the net weights of the given factor */ | |
void AnnScaleWeights(struct Ann *net, double factor) { | |
int j, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
int i; | |
for (i = 0; i < weights; i++) | |
net->layer[j].weight[i] *= factor; | |
} | |
} | |
/* Update the sgradient, that's the sum of the weight's gradient for every | |
* element of the training set. This is used for the RPROP algorithm | |
* that works with the sign of the derivative for the whole set. */ | |
void AnnUpdateSgradient(struct Ann *net) { | |
int j, i, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
for (i = 0; i < weights; i++) | |
net->layer[j].sgradient[i] += net->layer[j].gradient[i]; | |
} | |
} | |
/* Helper function for RPROP, returns -1 if n < 0, +1 if n > 0, 0 if n == 0 */ | |
double sign(double n) { | |
if (n > 0) return +1; | |
if (n < 0) return -1; | |
return 0; | |
} | |
/* The core of the RPROP algorithm. | |
* | |
* Note that: | |
* sgradient is the set-wise gradient. | |
* delta is the per-weight update value. */ | |
void AnnAdjustWeightsResilientBP(struct Ann *net) { | |
int j, i, layers = LAYERS(net); | |
for (j = 1; j < layers; j++) { | |
int units = UNITS(net, j); | |
int weights = units * UNITS(net,j-1); | |
for (i = 0; i < weights; i++) { | |
double t = net->layer[j].pgradient[i] * | |
net->layer[j].sgradient[i]; | |
double delta = net->layer[j].delta[i]; | |
if (t > 0) { | |
delta = MIN(delta*RPROP_NPLUS(net),RPROP_MAXUPDATE(net)); | |
double wdelta = -sign(net->layer[j].sgradient[i]) * delta; | |
net->layer[j].weight[i] += wdelta; | |
net->layer[j].delta[i] = delta; | |
net->layer[j].pgradient[i] = net->layer[j].sgradient[i]; | |
} else if (t < 0) { | |
double past_wdelta = -sign(net->layer[j].pgradient[i]) * delta; | |
delta = MAX(delta*RPROP_NMINUS(net),RPROP_MINUPDATE(net)); | |
net->layer[j].weight[i] -= past_wdelta; | |
net->layer[j].delta[i] = delta; | |
net->layer[j].pgradient[i] = 0; | |
} else { /* t == 0 */ | |
double wdelta = -sign(net->layer[j].sgradient[i]) * delta; | |
net->layer[j].weight[i] += wdelta; | |
net->layer[j].pgradient[i] = net->layer[j].sgradient[i]; | |
} | |
} | |
} | |
} | |
/* Resilient Backpropagation Epoch */ | |
double AnnResilientBPEpoch(struct Ann *net, double *input, double *desidered, int setlen) { | |
double maxerr = 0, e; | |
int j, inputs = INPUT_UNITS(net), outputs = OUTPUT_UNITS(net); | |
AnnResetSgradient(net); | |
for (j = 0; j < setlen; j++) { | |
e = AnnSimulateError(net, input, desidered); | |
if (e > maxerr) maxerr = e; | |
AnnCalculateGradients(net, desidered); | |
AnnUpdateSgradient(net); | |
input += inputs; | |
desidered += outputs; | |
} | |
AnnAdjustWeightsResilientBP(net); | |
return maxerr; | |
} | |
/* Train the net */ | |
int AnnTrain(struct Ann *net, double *input, double *desidered, double maxerr, int maxepochs, int setlen) | |
{ | |
int i = 0; | |
double e = maxerr+1; | |
while (i++ < maxepochs && e >= maxerr) | |
e = AnnResilientBPEpoch(net, input, desidered, setlen); | |
if (i >= maxepochs) return 0; | |
return i; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment