Created
March 6, 2016 21:15
-
-
Save alexcmd/0254bd4f80a90bc5ccc4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
quaetrix analytics | |
software research, education, and technical content development | |
using System; | |
// For 2014 Microsoft Build Conference attendees | |
// April 2-4, 2014 | |
// San Francisco, CA | |
// | |
// This is source for a C# console application. | |
// To compile you can 1.) create a new Visual Studio | |
// C# console app project named BuildNeuralNetworkDemo | |
// then zap away the template code and replace with this code, | |
// or 2.) copy this code into notepad, save as NeuralNetworkProgram.cs | |
// on your local machine, launch the special VS command shell | |
// (it knows where the csc.exe compiler is), cd-navigate to | |
// the directory containing the .cs file, type 'csc.exe | |
// NeuralNetworkProgram.cs' and hit enter, and then after | |
// the compiler creates NeuralNetworkProgram.exe, you can | |
// run from the command line. | |
// | |
// This is an enhanced neural network. It is fully-connected | |
// and feed-forward. The training algorithm is back-propagation | |
// with momentum and weight decay. The input data is normalized | |
// so training is quite fast. | |
// | |
// You can use this code however you wish subject to the usual disclaimers | |
// (use at your own risk, etc.) | |
namespace BuildNeuralNetworkDemo | |
{ | |
class NeuralNetworkProgram | |
{ | |
static void Main(string[] args) | |
{ | |
Console.WriteLine("\nBegin Build 2014 neural network demo"); | |
Console.WriteLine("\nData is the famous Iris flower set."); | |
Console.WriteLine("Data is sepal length, sepal width, petal length, petal width -> iris species"); | |
Console.WriteLine("Iris setosa = 0 0 1, Iris versicolor = 0 1 0, Iris virginica = 1 0 0 "); | |
Console.WriteLine("The goal is to predict species from sepal length, width, petal length, width\n"); | |
Console.WriteLine("Raw data resembles:\n"); | |
Console.WriteLine(" 5.1, 3.5, 1.4, 0.2, Iris setosa"); | |
Console.WriteLine(" 7.0, 3.2, 4.7, 1.4, Iris versicolor"); | |
Console.WriteLine(" 6.3, 3.3, 6.0, 2.5, Iris virginica"); | |
Console.WriteLine(" ......\n"); | |
double[][] allData = new double[150][]; | |
allData[0] = new double[] { 5.1, 3.5, 1.4, 0.2, 0, 0, 1 }; // sepal length, width, petal length, width | |
allData[1] = new double[] { 4.9, 3.0, 1.4, 0.2, 0, 0, 1 }; // Iris setosa = 0 0 1 | |
allData[2] = new double[] { 4.7, 3.2, 1.3, 0.2, 0, 0, 1 }; // Iris versicolor = 0 1 0 | |
allData[3] = new double[] { 4.6, 3.1, 1.5, 0.2, 0, 0, 1 }; // Iris virginica = 1 0 0 | |
allData[4] = new double[] { 5.0, 3.6, 1.4, 0.2, 0, 0, 1 }; | |
allData[5] = new double[] { 5.4, 3.9, 1.7, 0.4, 0, 0, 1 }; | |
allData[6] = new double[] { 4.6, 3.4, 1.4, 0.3, 0, 0, 1 }; | |
allData[7] = new double[] { 5.0, 3.4, 1.5, 0.2, 0, 0, 1 }; | |
allData[8] = new double[] { 4.4, 2.9, 1.4, 0.2, 0, 0, 1 }; | |
allData[9] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 }; | |
allData[10] = new double[] { 5.4, 3.7, 1.5, 0.2, 0, 0, 1 }; | |
allData[11] = new double[] { 4.8, 3.4, 1.6, 0.2, 0, 0, 1 }; | |
allData[12] = new double[] { 4.8, 3.0, 1.4, 0.1, 0, 0, 1 }; | |
allData[13] = new double[] { 4.3, 3.0, 1.1, 0.1, 0, 0, 1 }; | |
allData[14] = new double[] { 5.8, 4.0, 1.2, 0.2, 0, 0, 1 }; | |
allData[15] = new double[] { 5.7, 4.4, 1.5, 0.4, 0, 0, 1 }; | |
allData[16] = new double[] { 5.4, 3.9, 1.3, 0.4, 0, 0, 1 }; | |
allData[17] = new double[] { 5.1, 3.5, 1.4, 0.3, 0, 0, 1 }; | |
allData[18] = new double[] { 5.7, 3.8, 1.7, 0.3, 0, 0, 1 }; | |
allData[19] = new double[] { 5.1, 3.8, 1.5, 0.3, 0, 0, 1 }; | |
allData[20] = new double[] { 5.4, 3.4, 1.7, 0.2, 0, 0, 1 }; | |
allData[21] = new double[] { 5.1, 3.7, 1.5, 0.4, 0, 0, 1 }; | |
allData[22] = new double[] { 4.6, 3.6, 1.0, 0.2, 0, 0, 1 }; | |
allData[23] = new double[] { 5.1, 3.3, 1.7, 0.5, 0, 0, 1 }; | |
allData[24] = new double[] { 4.8, 3.4, 1.9, 0.2, 0, 0, 1 }; | |
allData[25] = new double[] { 5.0, 3.0, 1.6, 0.2, 0, 0, 1 }; | |
allData[26] = new double[] { 5.0, 3.4, 1.6, 0.4, 0, 0, 1 }; | |
allData[27] = new double[] { 5.2, 3.5, 1.5, 0.2, 0, 0, 1 }; | |
allData[28] = new double[] { 5.2, 3.4, 1.4, 0.2, 0, 0, 1 }; | |
allData[29] = new double[] { 4.7, 3.2, 1.6, 0.2, 0, 0, 1 }; | |
allData[30] = new double[] { 4.8, 3.1, 1.6, 0.2, 0, 0, 1 }; | |
allData[31] = new double[] { 5.4, 3.4, 1.5, 0.4, 0, 0, 1 }; | |
allData[32] = new double[] { 5.2, 4.1, 1.5, 0.1, 0, 0, 1 }; | |
allData[33] = new double[] { 5.5, 4.2, 1.4, 0.2, 0, 0, 1 }; | |
allData[34] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 }; | |
allData[35] = new double[] { 5.0, 3.2, 1.2, 0.2, 0, 0, 1 }; | |
allData[36] = new double[] { 5.5, 3.5, 1.3, 0.2, 0, 0, 1 }; | |
allData[37] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 }; | |
allData[38] = new double[] { 4.4, 3.0, 1.3, 0.2, 0, 0, 1 }; | |
allData[39] = new double[] { 5.1, 3.4, 1.5, 0.2, 0, 0, 1 }; | |
allData[40] = new double[] { 5.0, 3.5, 1.3, 0.3, 0, 0, 1 }; | |
allData[41] = new double[] { 4.5, 2.3, 1.3, 0.3, 0, 0, 1 }; | |
allData[42] = new double[] { 4.4, 3.2, 1.3, 0.2, 0, 0, 1 }; | |
allData[43] = new double[] { 5.0, 3.5, 1.6, 0.6, 0, 0, 1 }; | |
allData[44] = new double[] { 5.1, 3.8, 1.9, 0.4, 0, 0, 1 }; | |
allData[45] = new double[] { 4.8, 3.0, 1.4, 0.3, 0, 0, 1 }; | |
allData[46] = new double[] { 5.1, 3.8, 1.6, 0.2, 0, 0, 1 }; | |
allData[47] = new double[] { 4.6, 3.2, 1.4, 0.2, 0, 0, 1 }; | |
allData[48] = new double[] { 5.3, 3.7, 1.5, 0.2, 0, 0, 1 }; | |
allData[49] = new double[] { 5.0, 3.3, 1.4, 0.2, 0, 0, 1 }; | |
allData[50] = new double[] { 7.0, 3.2, 4.7, 1.4, 0, 1, 0 }; | |
allData[51] = new double[] { 6.4, 3.2, 4.5, 1.5, 0, 1, 0 }; | |
allData[52] = new double[] { 6.9, 3.1, 4.9, 1.5, 0, 1, 0 }; | |
allData[53] = new double[] { 5.5, 2.3, 4.0, 1.3, 0, 1, 0 }; | |
allData[54] = new double[] { 6.5, 2.8, 4.6, 1.5, 0, 1, 0 }; | |
allData[55] = new double[] { 5.7, 2.8, 4.5, 1.3, 0, 1, 0 }; | |
allData[56] = new double[] { 6.3, 3.3, 4.7, 1.6, 0, 1, 0 }; | |
allData[57] = new double[] { 4.9, 2.4, 3.3, 1.0, 0, 1, 0 }; | |
allData[58] = new double[] { 6.6, 2.9, 4.6, 1.3, 0, 1, 0 }; | |
allData[59] = new double[] { 5.2, 2.7, 3.9, 1.4, 0, 1, 0 }; | |
allData[60] = new double[] { 5.0, 2.0, 3.5, 1.0, 0, 1, 0 }; | |
allData[61] = new double[] { 5.9, 3.0, 4.2, 1.5, 0, 1, 0 }; | |
allData[62] = new double[] { 6.0, 2.2, 4.0, 1.0, 0, 1, 0 }; | |
allData[63] = new double[] { 6.1, 2.9, 4.7, 1.4, 0, 1, 0 }; | |
allData[64] = new double[] { 5.6, 2.9, 3.6, 1.3, 0, 1, 0 }; | |
allData[65] = new double[] { 6.7, 3.1, 4.4, 1.4, 0, 1, 0 }; | |
allData[66] = new double[] { 5.6, 3.0, 4.5, 1.5, 0, 1, 0 }; | |
allData[67] = new double[] { 5.8, 2.7, 4.1, 1.0, 0, 1, 0 }; | |
allData[68] = new double[] { 6.2, 2.2, 4.5, 1.5, 0, 1, 0 }; | |
allData[69] = new double[] { 5.6, 2.5, 3.9, 1.1, 0, 1, 0 }; | |
allData[70] = new double[] { 5.9, 3.2, 4.8, 1.8, 0, 1, 0 }; | |
allData[71] = new double[] { 6.1, 2.8, 4.0, 1.3, 0, 1, 0 }; | |
allData[72] = new double[] { 6.3, 2.5, 4.9, 1.5, 0, 1, 0 }; | |
allData[73] = new double[] { 6.1, 2.8, 4.7, 1.2, 0, 1, 0 }; | |
allData[74] = new double[] { 6.4, 2.9, 4.3, 1.3, 0, 1, 0 }; | |
allData[75] = new double[] { 6.6, 3.0, 4.4, 1.4, 0, 1, 0 }; | |
allData[76] = new double[] { 6.8, 2.8, 4.8, 1.4, 0, 1, 0 }; | |
allData[77] = new double[] { 6.7, 3.0, 5.0, 1.7, 0, 1, 0 }; | |
allData[78] = new double[] { 6.0, 2.9, 4.5, 1.5, 0, 1, 0 }; | |
allData[79] = new double[] { 5.7, 2.6, 3.5, 1.0, 0, 1, 0 }; | |
allData[80] = new double[] { 5.5, 2.4, 3.8, 1.1, 0, 1, 0 }; | |
allData[81] = new double[] { 5.5, 2.4, 3.7, 1.0, 0, 1, 0 }; | |
allData[82] = new double[] { 5.8, 2.7, 3.9, 1.2, 0, 1, 0 }; | |
allData[83] = new double[] { 6.0, 2.7, 5.1, 1.6, 0, 1, 0 }; | |
allData[84] = new double[] { 5.4, 3.0, 4.5, 1.5, 0, 1, 0 }; | |
allData[85] = new double[] { 6.0, 3.4, 4.5, 1.6, 0, 1, 0 }; | |
allData[86] = new double[] { 6.7, 3.1, 4.7, 1.5, 0, 1, 0 }; | |
allData[87] = new double[] { 6.3, 2.3, 4.4, 1.3, 0, 1, 0 }; | |
allData[88] = new double[] { 5.6, 3.0, 4.1, 1.3, 0, 1, 0 }; | |
allData[89] = new double[] { 5.5, 2.5, 4.0, 1.3, 0, 1, 0 }; | |
allData[90] = new double[] { 5.5, 2.6, 4.4, 1.2, 0, 1, 0 }; | |
allData[91] = new double[] { 6.1, 3.0, 4.6, 1.4, 0, 1, 0 }; | |
allData[92] = new double[] { 5.8, 2.6, 4.0, 1.2, 0, 1, 0 }; | |
allData[93] = new double[] { 5.0, 2.3, 3.3, 1.0, 0, 1, 0 }; | |
allData[94] = new double[] { 5.6, 2.7, 4.2, 1.3, 0, 1, 0 }; | |
allData[95] = new double[] { 5.7, 3.0, 4.2, 1.2, 0, 1, 0 }; | |
allData[96] = new double[] { 5.7, 2.9, 4.2, 1.3, 0, 1, 0 }; | |
allData[97] = new double[] { 6.2, 2.9, 4.3, 1.3, 0, 1, 0 }; | |
allData[98] = new double[] { 5.1, 2.5, 3.0, 1.1, 0, 1, 0 }; | |
allData[99] = new double[] { 5.7, 2.8, 4.1, 1.3, 0, 1, 0 }; | |
allData[100] = new double[] { 6.3, 3.3, 6.0, 2.5, 1, 0, 0 }; | |
allData[101] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 }; | |
allData[102] = new double[] { 7.1, 3.0, 5.9, 2.1, 1, 0, 0 }; | |
allData[103] = new double[] { 6.3, 2.9, 5.6, 1.8, 1, 0, 0 }; | |
allData[104] = new double[] { 6.5, 3.0, 5.8, 2.2, 1, 0, 0 }; | |
allData[105] = new double[] { 7.6, 3.0, 6.6, 2.1, 1, 0, 0 }; | |
allData[106] = new double[] { 4.9, 2.5, 4.5, 1.7, 1, 0, 0 }; | |
allData[107] = new double[] { 7.3, 2.9, 6.3, 1.8, 1, 0, 0 }; | |
allData[108] = new double[] { 6.7, 2.5, 5.8, 1.8, 1, 0, 0 }; | |
allData[109] = new double[] { 7.2, 3.6, 6.1, 2.5, 1, 0, 0 }; | |
allData[110] = new double[] { 6.5, 3.2, 5.1, 2.0, 1, 0, 0 }; | |
allData[111] = new double[] { 6.4, 2.7, 5.3, 1.9, 1, 0, 0 }; | |
allData[112] = new double[] { 6.8, 3.0, 5.5, 2.1, 1, 0, 0 }; | |
allData[113] = new double[] { 5.7, 2.5, 5.0, 2.0, 1, 0, 0 }; | |
allData[114] = new double[] { 5.8, 2.8, 5.1, 2.4, 1, 0, 0 }; | |
allData[115] = new double[] { 6.4, 3.2, 5.3, 2.3, 1, 0, 0 }; | |
allData[116] = new double[] { 6.5, 3.0, 5.5, 1.8, 1, 0, 0 }; | |
allData[117] = new double[] { 7.7, 3.8, 6.7, 2.2, 1, 0, 0 }; | |
allData[118] = new double[] { 7.7, 2.6, 6.9, 2.3, 1, 0, 0 }; | |
allData[119] = new double[] { 6.0, 2.2, 5.0, 1.5, 1, 0, 0 }; | |
allData[120] = new double[] { 6.9, 3.2, 5.7, 2.3, 1, 0, 0 }; | |
allData[121] = new double[] { 5.6, 2.8, 4.9, 2.0, 1, 0, 0 }; | |
allData[122] = new double[] { 7.7, 2.8, 6.7, 2.0, 1, 0, 0 }; | |
allData[123] = new double[] { 6.3, 2.7, 4.9, 1.8, 1, 0, 0 }; | |
allData[124] = new double[] { 6.7, 3.3, 5.7, 2.1, 1, 0, 0 }; | |
allData[125] = new double[] { 7.2, 3.2, 6.0, 1.8, 1, 0, 0 }; | |
allData[126] = new double[] { 6.2, 2.8, 4.8, 1.8, 1, 0, 0 }; | |
allData[127] = new double[] { 6.1, 3.0, 4.9, 1.8, 1, 0, 0 }; | |
allData[128] = new double[] { 6.4, 2.8, 5.6, 2.1, 1, 0, 0 }; | |
allData[129] = new double[] { 7.2, 3.0, 5.8, 1.6, 1, 0, 0 }; | |
allData[130] = new double[] { 7.4, 2.8, 6.1, 1.9, 1, 0, 0 }; | |
allData[131] = new double[] { 7.9, 3.8, 6.4, 2.0, 1, 0, 0 }; | |
allData[132] = new double[] { 6.4, 2.8, 5.6, 2.2, 1, 0, 0 }; | |
allData[133] = new double[] { 6.3, 2.8, 5.1, 1.5, 1, 0, 0 }; | |
allData[134] = new double[] { 6.1, 2.6, 5.6, 1.4, 1, 0, 0 }; | |
allData[135] = new double[] { 7.7, 3.0, 6.1, 2.3, 1, 0, 0 }; | |
allData[136] = new double[] { 6.3, 3.4, 5.6, 2.4, 1, 0, 0 }; | |
allData[137] = new double[] { 6.4, 3.1, 5.5, 1.8, 1, 0, 0 }; | |
allData[138] = new double[] { 6.0, 3.0, 4.8, 1.8, 1, 0, 0 }; | |
allData[139] = new double[] { 6.9, 3.1, 5.4, 2.1, 1, 0, 0 }; | |
allData[140] = new double[] { 6.7, 3.1, 5.6, 2.4, 1, 0, 0 }; | |
allData[141] = new double[] { 6.9, 3.1, 5.1, 2.3, 1, 0, 0 }; | |
allData[142] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 }; | |
allData[143] = new double[] { 6.8, 3.2, 5.9, 2.3, 1, 0, 0 }; | |
allData[144] = new double[] { 6.7, 3.3, 5.7, 2.5, 1, 0, 0 }; | |
allData[145] = new double[] { 6.7, 3.0, 5.2, 2.3, 1, 0, 0 }; | |
allData[146] = new double[] { 6.3, 2.5, 5.0, 1.9, 1, 0, 0 }; | |
allData[147] = new double[] { 6.5, 3.0, 5.2, 2.0, 1, 0, 0 }; | |
allData[148] = new double[] { 6.2, 3.4, 5.4, 2.3, 1, 0, 0 }; | |
allData[149] = new double[] { 5.9, 3.0, 5.1, 1.8, 1, 0, 0 }; | |
Console.WriteLine("\nFirst 6 rows of entire 150-item data set:"); | |
ShowMatrix(allData, 6, 1, true); | |
Console.WriteLine("Creating 80% training and 20% test data matrices"); | |
double[][] trainData = null; | |
double[][] testData = null; | |
MakeTrainTest(allData, out trainData, out testData); | |
Console.WriteLine("\nFirst 5 rows of training data:"); | |
ShowMatrix(trainData, 5, 1, true); | |
Console.WriteLine("First 3 rows of test data:"); | |
ShowMatrix(testData, 3, 1, true); | |
Normalize(trainData, new int[] { 0, 1, 2, 3 }); | |
Normalize(testData, new int[] { 0, 1, 2, 3 }); | |
Console.WriteLine("\nFirst 5 rows of normalized training data:"); | |
ShowMatrix(trainData, 5, 1, true); | |
Console.WriteLine("First 3 rows of normalized test data:"); | |
ShowMatrix(testData, 3, 1, true); | |
Console.WriteLine("\nCreating a 4-input, 7-hidden, 3-output neural network"); | |
Console.Write("Hard-coded tanh function for input-to-hidden and softmax for "); | |
Console.WriteLine("hidden-to-output activations"); | |
const int numInput = 4; | |
const int numHidden = 7; | |
const int numOutput = 3; | |
NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput); | |
Console.WriteLine("\nInitializing weights and bias to small random values"); | |
nn.InitializeWeights(); | |
int maxEpochs = 2000; | |
double learnRate = 0.05; | |
double momentum = 0.01; | |
double weightDecay = 0.0001; | |
Console.WriteLine("Setting maxEpochs = 2000, learnRate = 0.05, momentum = 0.01, weightDecay = 0.0001"); | |
Console.WriteLine("Training has hard-coded mean squared error < 0.020 stopping condition"); | |
Console.WriteLine("\nBeginning training using incremental back-propagation\n"); | |
nn.Train(trainData, maxEpochs, learnRate, momentum, weightDecay); | |
Console.WriteLine("Training complete"); | |
double[] weights = nn.GetWeights(); | |
Console.WriteLine("Final neural network weights and bias values:"); | |
ShowVector(weights, 10, 3, true); | |
double trainAcc = nn.Accuracy(trainData); | |
Console.WriteLine("\nAccuracy on training data = " + trainAcc.ToString("F4")); | |
double testAcc = nn.Accuracy(testData); | |
Console.WriteLine("\nAccuracy on test data = " + testAcc.ToString("F4")); | |
Console.WriteLine("\nEnd Build 2013 neural network demo\n"); | |
Console.ReadLine(); | |
} // Main | |
static void MakeTrainTest(double[][] allData, out double[][] trainData, out double[][] testData) | |
{ | |
// split allData into 80% trainData and 20% testData | |
Random rnd = new Random(0); | |
int totRows = allData.Length; | |
int numCols = allData[0].Length; | |
int trainRows = (int)(totRows * 0.80); // hard-coded 80-20 split | |
int testRows = totRows - trainRows; | |
trainData = new double[trainRows][]; | |
testData = new double[testRows][]; | |
int[] sequence = new int[totRows]; // create a random sequence of indexes | |
for (int i = 0; i < sequence.Length; ++i) | |
sequence[i] = i; | |
for (int i = 0; i < sequence.Length; ++i) | |
{ | |
int r = rnd.Next(i, sequence.Length); | |
int tmp = sequence[r]; | |
sequence[r] = sequence[i]; | |
sequence[i] = tmp; | |
} | |
int si = 0; // index into sequence[] | |
int j = 0; // index into trainData or testData | |
for (; si < trainRows; ++si) // first rows to train data | |
{ | |
trainData[j] = new double[numCols]; | |
int idx = sequence[si]; | |
Array.Copy(allData[idx], trainData[j], numCols); | |
++j; | |
} | |
j = 0; // reset to start of test data | |
for (; si < totRows; ++si) // remainder to test data | |
{ | |
testData[j] = new double[numCols]; | |
int idx = sequence[si]; | |
Array.Copy(allData[idx], testData[j], numCols); | |
++j; | |
} | |
} // MakeTrainTest | |
static void Normalize(double[][] dataMatrix, int[] cols) | |
{ | |
// normalize specified cols by computing (x - mean) / sd for each value | |
foreach (int col in cols) | |
{ | |
double sum = 0.0; | |
for (int i = 0; i < dataMatrix.Length; ++i) | |
sum += dataMatrix[i][col]; | |
double mean = sum / dataMatrix.Length; | |
sum = 0.0; | |
for (int i = 0; i < dataMatrix.Length; ++i) | |
sum += (dataMatrix[i][col] - mean) * (dataMatrix[i][col] - mean); | |
// thanks to Dr. W. Winfrey, Concord Univ., for catching bug in original code | |
double sd = Math.Sqrt(sum / (dataMatrix.Length - 1)); | |
for (int i = 0; i < dataMatrix.Length; ++i) | |
dataMatrix[i][col] = (dataMatrix[i][col] - mean) / sd; | |
} | |
} | |
static void ShowVector(double[] vector, int valsPerRow, int decimals, bool newLine) | |
{ | |
for (int i = 0; i < vector.Length; ++i) | |
{ | |
if (i % valsPerRow == 0) Console.WriteLine(""); | |
Console.Write(vector[i].ToString("F" + decimals).PadLeft(decimals + 4) + " "); | |
} | |
if (newLine == true) Console.WriteLine(""); | |
} | |
static void ShowMatrix(double[][] matrix, int numRows, int decimals, bool newLine) | |
{ | |
for (int i = 0; i < numRows; ++i) | |
{ | |
Console.Write(i.ToString().PadLeft(3) + ": "); | |
for (int j = 0; j < matrix[i].Length; ++j) | |
{ | |
if (matrix[i][j] >= 0.0) Console.Write(" "); else Console.Write("-"); | |
Console.Write(Math.Abs(matrix[i][j]).ToString("F" + decimals) + " "); | |
} | |
Console.WriteLine(""); | |
} | |
if (newLine == true) Console.WriteLine(""); | |
} | |
} // class Program | |
public class NeuralNetwork | |
{ | |
private static Random rnd; | |
private int numInput; | |
private int numHidden; | |
private int numOutput; | |
private double[] inputs; | |
private double[][] ihWeights; // input-hidden | |
private double[] hBiases; | |
private double[] hOutputs; | |
private double[][] hoWeights; // hidden-output | |
private double[] oBiases; | |
private double[] outputs; | |
// back-prop specific arrays (these could be local to method UpdateWeights) | |
private double[] oGrads; // output gradients for back-propagation | |
private double[] hGrads; // hidden gradients for back-propagation | |
// back-prop momentum specific arrays (could be local to method Train) | |
private double[][] ihPrevWeightsDelta; // for momentum with back-propagation | |
private double[] hPrevBiasesDelta; | |
private double[][] hoPrevWeightsDelta; | |
private double[] oPrevBiasesDelta; | |
public NeuralNetwork(int numInput, int numHidden, int numOutput) | |
{ | |
rnd = new Random(0); // for InitializeWeights() and Shuffle() | |
this.numInput = numInput; | |
this.numHidden = numHidden; | |
this.numOutput = numOutput; | |
this.inputs = new double[numInput]; | |
this.ihWeights = MakeMatrix(numInput, numHidden); | |
this.hBiases = new double[numHidden]; | |
this.hOutputs = new double[numHidden]; | |
this.hoWeights = MakeMatrix(numHidden, numOutput); | |
this.oBiases = new double[numOutput]; | |
this.outputs = new double[numOutput]; | |
// back-prop related arrays below | |
this.hGrads = new double[numHidden]; | |
this.oGrads = new double[numOutput]; | |
this.ihPrevWeightsDelta = MakeMatrix(numInput, numHidden); | |
this.hPrevBiasesDelta = new double[numHidden]; | |
this.hoPrevWeightsDelta = MakeMatrix(numHidden, numOutput); | |
this.oPrevBiasesDelta = new double[numOutput]; | |
} // ctor | |
private static double[][] MakeMatrix(int rows, int cols) // helper for ctor | |
{ | |
double[][] result = new double[rows][]; | |
for (int r = 0; r < result.Length; ++r) | |
result[r] = new double[cols]; | |
return result; | |
} | |
public override string ToString() // yikes | |
{ | |
string s = ""; | |
s += "===============================\n"; | |
s += "numInput = " + numInput + " numHidden = " + numHidden + " numOutput = " + numOutput + "\n\n"; | |
s += "inputs: \n"; | |
for (int i = 0; i < inputs.Length; ++i) | |
s += inputs[i].ToString("F2") + " "; | |
s += "\n\n"; | |
s += "ihWeights: \n"; | |
for (int i = 0; i < ihWeights.Length; ++i) | |
{ | |
for (int j = 0; j < ihWeights[i].Length; ++j) | |
{ | |
s += ihWeights[i][j].ToString("F4") + " "; | |
} | |
s += "\n"; | |
} | |
s += "\n"; | |
s += "hBiases: \n"; | |
for (int i = 0; i < hBiases.Length; ++i) | |
s += hBiases[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "hOutputs: \n"; | |
for (int i = 0; i < hOutputs.Length; ++i) | |
s += hOutputs[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "hoWeights: \n"; | |
for (int i = 0; i < hoWeights.Length; ++i) | |
{ | |
for (int j = 0; j < hoWeights[i].Length; ++j) | |
{ | |
s += hoWeights[i][j].ToString("F4") + " "; | |
} | |
s += "\n"; | |
} | |
s += "\n"; | |
s += "oBiases: \n"; | |
for (int i = 0; i < oBiases.Length; ++i) | |
s += oBiases[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "hGrads: \n"; | |
for (int i = 0; i < hGrads.Length; ++i) | |
s += hGrads[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "oGrads: \n"; | |
for (int i = 0; i < oGrads.Length; ++i) | |
s += oGrads[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "ihPrevWeightsDelta: \n"; | |
for (int i = 0; i < ihPrevWeightsDelta.Length; ++i) | |
{ | |
for (int j = 0; j < ihPrevWeightsDelta[i].Length; ++j) | |
{ | |
s += ihPrevWeightsDelta[i][j].ToString("F4") + " "; | |
} | |
s += "\n"; | |
} | |
s += "\n"; | |
s += "hPrevBiasesDelta: \n"; | |
for (int i = 0; i < hPrevBiasesDelta.Length; ++i) | |
s += hPrevBiasesDelta[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "hoPrevWeightsDelta: \n"; | |
for (int i = 0; i < hoPrevWeightsDelta.Length; ++i) | |
{ | |
for (int j = 0; j < hoPrevWeightsDelta[i].Length; ++j) | |
{ | |
s += hoPrevWeightsDelta[i][j].ToString("F4") + " "; | |
} | |
s += "\n"; | |
} | |
s += "\n"; | |
s += "oPrevBiasesDelta: \n"; | |
for (int i = 0; i < oPrevBiasesDelta.Length; ++i) | |
s += oPrevBiasesDelta[i].ToString("F4") + " "; | |
s += "\n\n"; | |
s += "outputs: \n"; | |
for (int i = 0; i < outputs.Length; ++i) | |
s += outputs[i].ToString("F2") + " "; | |
s += "\n\n"; | |
s += "===============================\n"; | |
return s; | |
} | |
// ---------------------------------------------------------------------------------------- | |
public void SetWeights(double[] weights) | |
{ | |
// copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o biases | |
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput; | |
if (weights.Length != numWeights) | |
throw new Exception("Bad weights array length: "); | |
int k = 0; // points into weights param | |
for (int i = 0; i < numInput; ++i) | |
for (int j = 0; j < numHidden; ++j) | |
ihWeights[i][j] = weights[k++]; | |
for (int i = 0; i < numHidden; ++i) | |
hBiases[i] = weights[k++]; | |
for (int i = 0; i < numHidden; ++i) | |
for (int j = 0; j < numOutput; ++j) | |
hoWeights[i][j] = weights[k++]; | |
for (int i = 0; i < numOutput; ++i) | |
oBiases[i] = weights[k++]; | |
} | |
public void InitializeWeights() | |
{ | |
// initialize weights and biases to small random values | |
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput; | |
double[] initialWeights = new double[numWeights]; | |
double lo = -0.01; | |
double hi = 0.01; | |
for (int i = 0; i < initialWeights.Length; ++i) | |
initialWeights[i] = (hi - lo) * rnd.NextDouble() + lo; | |
this.SetWeights(initialWeights); | |
} | |
public double[] GetWeights() | |
{ | |
// returns the current set of wweights, presumably after training | |
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput; | |
double[] result = new double[numWeights]; | |
int k = 0; | |
for (int i = 0; i < ihWeights.Length; ++i) | |
for (int j = 0; j < ihWeights[0].Length; ++j) | |
result[k++] = ihWeights[i][j]; | |
for (int i = 0; i < hBiases.Length; ++i) | |
result[k++] = hBiases[i]; | |
for (int i = 0; i < hoWeights.Length; ++i) | |
for (int j = 0; j < hoWeights[0].Length; ++j) | |
result[k++] = hoWeights[i][j]; | |
for (int i = 0; i < oBiases.Length; ++i) | |
result[k++] = oBiases[i]; | |
return result; | |
} | |
// ---------------------------------------------------------------------------------------- | |
private double[] ComputeOutputs(double[] xValues) | |
{ | |
if (xValues.Length != numInput) | |
throw new Exception("Bad xValues array length"); | |
double[] hSums = new double[numHidden]; // hidden nodes sums scratch array | |
double[] oSums = new double[numOutput]; // output nodes sums | |
for (int i = 0; i < xValues.Length; ++i) // copy x-values to inputs | |
this.inputs[i] = xValues[i]; | |
for (int j = 0; j < numHidden; ++j) // compute i-h sum of weights * inputs | |
for (int i = 0; i < numInput; ++i) | |
hSums[j] += this.inputs[i] * this.ihWeights[i][j]; // note += | |
for (int i = 0; i < numHidden; ++i) // add biases to input-to-hidden sums | |
hSums[i] += this.hBiases[i]; | |
for (int i = 0; i < numHidden; ++i) // apply activation | |
this.hOutputs[i] = HyperTanFunction(hSums[i]); // hard-coded | |
for (int j = 0; j < numOutput; ++j) // compute h-o sum of weights * hOutputs | |
for (int i = 0; i < numHidden; ++i) | |
oSums[j] += hOutputs[i] * hoWeights[i][j]; | |
for (int i = 0; i < numOutput; ++i) // add biases to input-to-hidden sums | |
oSums[i] += oBiases[i]; | |
double[] softOut = Softmax(oSums); // softmax activation does all outputs at once for efficiency | |
Array.Copy(softOut, outputs, softOut.Length); | |
double[] retResult = new double[numOutput]; // could define a GetOutputs method instead | |
Array.Copy(this.outputs, retResult, retResult.Length); | |
return retResult; | |
} // ComputeOutputs | |
private static double HyperTanFunction(double x) | |
{ | |
if (x < -20.0) return -1.0; // approximation is correct to 30 decimals | |
else if (x > 20.0) return 1.0; | |
else return Math.Tanh(x); | |
} | |
private static double[] Softmax(double[] oSums) | |
{ | |
// determine max output sum | |
// does all output nodes at once so scale doesn't have to be re-computed each time | |
double max = oSums[0]; | |
for (int i = 0; i < oSums.Length; ++i) | |
if (oSums[i] > max) max = oSums[i]; | |
// determine scaling factor -- sum of exp(each val - max) | |
double scale = 0.0; | |
for (int i = 0; i < oSums.Length; ++i) | |
scale += Math.Exp(oSums[i] - max); | |
double[] result = new double[oSums.Length]; | |
for (int i = 0; i < oSums.Length; ++i) | |
result[i] = Math.Exp(oSums[i] - max) / scale; | |
return result; // now scaled so that xi sum to 1.0 | |
} | |
// ---------------------------------------------------------------------------------------- | |
private void UpdateWeights(double[] tValues, double learnRate, double momentum, double weightDecay) | |
{ | |
// update the weights and biases using back-propagation, with target values, eta (learning rate), | |
// alpha (momentum). | |
// assumes that SetWeights and ComputeOutputs have been called and so all the internal arrays | |
// and matrices have values (other than 0.0) | |
if (tValues.Length != numOutput) | |
throw new Exception("target values not same Length as output in UpdateWeights"); | |
// 1. compute output gradients | |
for (int i = 0; i < oGrads.Length; ++i) | |
{ | |
// derivative of softmax = (1 - y) * y (same as log-sigmoid) | |
double derivative = (1 - outputs[i]) * outputs[i]; | |
// 'mean squared error version' includes (1-y)(y) derivative | |
oGrads[i] = derivative * (tValues[i] - outputs[i]); | |
} | |
// 2. compute hidden gradients | |
for (int i = 0; i < hGrads.Length; ++i) | |
{ | |
// derivative of tanh = (1 - y) * (1 + y) | |
double derivative = (1 - hOutputs[i]) * (1 + hOutputs[i]); | |
double sum = 0.0; | |
for (int j = 0; j < numOutput; ++j) // each hidden delta is the sum of numOutput terms | |
{ | |
double x = oGrads[j] * hoWeights[i][j]; | |
sum += x; | |
} | |
hGrads[i] = derivative * sum; | |
} | |
// 3a. update hidden weights (gradients must be computed right-to-left but weights | |
// can be updated in any order) | |
for (int i = 0; i < ihWeights.Length; ++i) // 0..2 (3) | |
{ | |
for (int j = 0; j < ihWeights[0].Length; ++j) // 0..3 (4) | |
{ | |
double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta | |
ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very tricky. | |
// now add momentum using previous delta. on first pass old value will be 0.0 but that's OK. | |
ihWeights[i][j] += momentum * ihPrevWeightsDelta[i][j]; | |
ihWeights[i][j] -= (weightDecay * ihWeights[i][j]); // weight decay | |
ihPrevWeightsDelta[i][j] = delta; // don't forget to save the delta for momentum | |
} | |
} | |
// 3b. update hidden biases | |
for (int i = 0; i < hBiases.Length; ++i) | |
{ | |
double delta = learnRate * hGrads[i] * 1.0; // t1.0 is constant input for bias; could leave out | |
hBiases[i] += delta; | |
hBiases[i] += momentum * hPrevBiasesDelta[i]; // momentum | |
hBiases[i] -= (weightDecay * hBiases[i]); // weight decay | |
hPrevBiasesDelta[i] = delta; // don't forget to save the delta | |
} | |
// 4. update hidden-output weights | |
for (int i = 0; i < hoWeights.Length; ++i) | |
{ | |
for (int j = 0; j < hoWeights[0].Length; ++j) | |
{ | |
// see above: hOutputs are inputs to the nn outputs | |
double delta = learnRate * oGrads[j] * hOutputs[i]; | |
hoWeights[i][j] += delta; | |
hoWeights[i][j] += momentum * hoPrevWeightsDelta[i][j]; // momentum | |
hoWeights[i][j] -= (weightDecay * hoWeights[i][j]); // weight decay | |
hoPrevWeightsDelta[i][j] = delta; // save | |
} | |
} | |
// 4b. update output biases | |
for (int i = 0; i < oBiases.Length; ++i) | |
{ | |
double delta = learnRate * oGrads[i] * 1.0; | |
oBiases[i] += delta; | |
oBiases[i] += momentum * oPrevBiasesDelta[i]; // momentum | |
oBiases[i] -= (weightDecay * oBiases[i]); // weight decay | |
oPrevBiasesDelta[i] = delta; // save | |
} | |
} // UpdateWeights | |
// ---------------------------------------------------------------------------------------- | |
public void Train(double[][] trainData, int maxEprochs, double learnRate, double momentum, | |
double weightDecay) | |
{ | |
// train a back-prop style NN classifier using learning rate and momentum | |
// weight decay reduces the magnitude of a weight value over time unless that value | |
// is constantly increased | |
int epoch = 0; | |
double[] xValues = new double[numInput]; // inputs | |
double[] tValues = new double[numOutput]; // target values | |
int[] sequence = new int[trainData.Length]; | |
for (int i = 0; i < sequence.Length; ++i) | |
sequence[i] = i; | |
while (epoch < maxEprochs) | |
{ | |
double mse = MeanSquaredError(trainData); | |
if (mse < 0.020) break; // consider passing value in as parameter | |
//if (mse < 0.001) break; // consider passing value in as parameter | |
Shuffle(sequence); // visit each training data in random order | |
for (int i = 0; i < trainData.Length; ++i) | |
{ | |
int idx = sequence[i]; | |
Array.Copy(trainData[idx], xValues, numInput); | |
Array.Copy(trainData[idx], numInput, tValues, 0, numOutput); | |
ComputeOutputs(xValues); // copy xValues in, compute outputs (store them internally) | |
UpdateWeights(tValues, learnRate, momentum, weightDecay); // find better weights | |
} // each training tuple | |
++epoch; | |
} | |
} // Train | |
private static void Shuffle(int[] sequence) | |
{ | |
for (int i = 0; i < sequence.Length; ++i) | |
{ | |
int r = rnd.Next(i, sequence.Length); | |
int tmp = sequence[r]; | |
sequence[r] = sequence[i]; | |
sequence[i] = tmp; | |
} | |
} | |
private double MeanSquaredError(double[][] trainData) // used as a training stopping condition | |
{ | |
// average squared error per training tuple | |
double sumSquaredError = 0.0; | |
double[] xValues = new double[numInput]; // first numInput values in trainData | |
double[] tValues = new double[numOutput]; // last numOutput values | |
// walk thru each training case. looks like (6.9 3.2 5.7 2.3) (0 0 1) | |
for (int i = 0; i < trainData.Length; ++i) | |
{ | |
Array.Copy(trainData[i], xValues, numInput); | |
Array.Copy(trainData[i], numInput, tValues, 0, numOutput); // get target values | |
double[] yValues = this.ComputeOutputs(xValues); // compute output using current weights | |
for (int j = 0; j < numOutput; ++j) | |
{ | |
double err = tValues[j] - yValues[j]; | |
sumSquaredError += err * err; | |
} | |
} | |
return sumSquaredError / trainData.Length; | |
} | |
// ---------------------------------------------------------------------------------------- | |
public double Accuracy(double[][] testData) | |
{ | |
// percentage correct using winner-takes all | |
int numCorrect = 0; | |
int numWrong = 0; | |
double[] xValues = new double[numInput]; // inputs | |
double[] tValues = new double[numOutput]; // targets | |
double[] yValues; // computed Y | |
for (int i = 0; i < testData.Length; ++i) | |
{ | |
Array.Copy(testData[i], xValues, numInput); // parse test data into x-values and t-values | |
Array.Copy(testData[i], numInput, tValues, 0, numOutput); | |
yValues = this.ComputeOutputs(xValues); | |
int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value? | |
if (tValues[maxIndex] == 1.0) // ugly. consider AreEqual(double x, double y) | |
++numCorrect; | |
else | |
++numWrong; | |
} | |
return (numCorrect * 1.0) / (numCorrect + numWrong); // ugly 2 - check for divide by zero | |
} | |
private static int MaxIndex(double[] vector) // helper for Accuracy() | |
{ | |
// index of largest value | |
int bigIndex = 0; | |
double biggestVal = vector[0]; | |
for (int i = 0; i < vector.Length; ++i) | |
{ | |
if (vector[i] > biggestVal) | |
{ | |
biggestVal = vector[i]; bigIndex = i; | |
} | |
} | |
return bigIndex; | |
} | |
} // NeuralNetwork | |
} // ns | |
| 2014 quaetrix analytics | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment