Last active
January 8, 2021 17:30
-
-
Save realvictorprm/d97410cdfca208922ee3322f46543190 to your computer and use it in GitHub Desktop.
Proof of concept implementation of a simple CNN consisting of 1 convolution layer, 1 pooling layer, and variable amounts of classic neuronal network layers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget: MathNet.Numerics" | |
#r "nuget: MathNet.Numerics.FSharp" | |
open System.Runtime.CompilerServices | |
open Checked | |
open MathNet.Numerics | |
open MathNet.Numerics.LinearAlgebra.Double | |
open MathNet.Numerics.LinearAlgebra | |
let inline (.*) (m1: LinearAlgebra.Matrix<'T>) (m2: LinearAlgebra.Matrix<'T>) = m1.PointwiseMultiply m2 | |
let inline isNan a = | |
match a |> box with | |
| :? System.Double as a -> System.Double.IsNaN a | |
| _ -> false | |
//let inline (/) a b = | |
// if isNan a || isNan b then raise <| System.NotFiniteNumberException() | |
// elif a = LanguagePrimitives.GenericZero<_> && b = LanguagePrimitives.GenericZero<_> then | |
// raise <| System.NotFiniteNumberException("a and b are nearly zero which would result in NaN!") | |
// else Operators.(/) a b | |
[<AutoOpen>] | |
module StringExtensions = | |
open System.Text.RegularExpressions | |
let private marginRegex = | |
Regex("^.*?\|", RegexOptions.Multiline ||| RegexOptions.Compiled) | |
type System.String with | |
member self.StripMargin = | |
//printfn "%s" self | |
let res = marginRegex.Replace(self, "") | |
//printfn "%s" res | |
res | |
type Node = Node of weights: float [,] | |
type Layer = Layer of nodes: Node [,] | |
type Network2D = { layers: Layer [] } | |
type LayerResult = | |
{ activation: float [,] | |
intervalues: float [,] list } | |
type PositionNetwork = { layer: int; node: int * int } | |
type CNN = | |
{ filterMatrices: float [,] [] | |
network: Network2D } | |
module Core = | |
let max = max | |
module Array = | |
let inline private checkNonNull argName arg = | |
match box arg with | |
| null -> nullArg argName | |
| _ -> () | |
let foldi<'T, 'State> (folder: 'State -> int -> 'T -> 'State) (state: 'State) (array: 'T []) = | |
checkNonNull "array" array | |
let f = | |
OptimizedClosures.FSharpFunc<_, _, _, _> | |
.Adapt(folder) | |
let mutable state = state | |
for i = 0 to array.Length - 1 do | |
state <- f.Invoke(state, i, array.[i]) | |
state | |
module Array2D = | |
let inline private checkNonNull argName arg = | |
match box arg with | |
| null -> nullArg argName | |
| _ -> () | |
let getDimsForArray2D arr = | |
{| rowCount = arr |> Array2D.length1 | |
columnCount = arr |> Array2D.length2 |} | |
let foldi<'T, 'State> (folder: 'State -> int -> int -> 'T -> 'State) (state: 'State) (array: 'T [,]) = | |
checkNonNull "array" array | |
let f = | |
OptimizedClosures.FSharpFunc<_, _, _, _, _> | |
.Adapt(folder) | |
let mutable state = state | |
let length1 = Array2D.length1 array | |
let length2 = Array2D.length2 array | |
for i = 0 to length1 - 1 do | |
for j = 0 to length2 - 1 do | |
state <- f.Invoke(state, i, j, array.[i, j]) | |
state | |
let fold<'T, 'State> (folder: 'State -> 'T -> 'State) (state: 'State) (array: 'T [,]) = | |
checkNonNull "array" array | |
let f = | |
OptimizedClosures.FSharpFunc<_, _, _> | |
.Adapt(folder) | |
let mutable state = state | |
let length1 = Array2D.length1 array | |
let length2 = Array2D.length2 array | |
for i = 0 to length1 - 1 do | |
for j = 0 to length2 - 1 do | |
state <- f.Invoke(state, array.[i, j]) | |
state | |
let inline averageBy (fn: 'a -> 'b) (arr: 'a [,]) = | |
let dims = getDimsForArray2D arr | |
let acc = | |
arr | |
|> fold (fun acc element -> Checked.op_Addition acc <| fn element) LanguagePrimitives.GenericZero<'b> | |
LanguagePrimitives.DivideByInt acc (dims.rowCount * dims.columnCount) | |
let inline max arr = arr |> fold max arr.[0, 0] | |
let inline maxWithWinnerIndex arr = | |
let (maxElement, maxElementIndex) = | |
arr | |
|> foldi | |
(fun (maxSoFar, maxSoFarIndex) i j element -> | |
let newMax = Core.max element maxSoFar | |
if newMax <> maxSoFar then | |
(newMax, (i, j)) | |
else | |
(maxSoFar, maxSoFarIndex)) | |
(arr.[0, 0], (0, 0)) | |
{| maxElement = maxElement | |
maxElementIndex = maxElementIndex |} | |
let inline zip arr1 arr2 = | |
arr1 | |
|> Array2D.mapi (fun i j e1 -> (e1, Array2D.get arr2 i j)) | |
let inline sumBy (fn: 'a -> 'b) (arr: 'a [,]) = | |
arr | |
|> fold (fun state e -> Checked.op_Addition state <| fn e) LanguagePrimitives.GenericZero<'b> | |
let inline toMatrix (arr: 'a [,]) = | |
let dims = getDimsForArray2D arr | |
let m1 = | |
Matrix<'a> | |
.Build.Dense(dims.rowCount, dims.columnCount) | |
arr | |
|> Array2D.iteri (fun i j value -> m1.[i, j] <- value) | |
m1 | |
let getValues2D (inputX: float [,]) (network: Network2D) sigmaFun = | |
network.layers | |
|> Array.fold | |
(fun (state: LayerResult) (Layer layer) -> | |
let activation = state.activation | |
let ls = state.intervalues | |
let res: float [,] = | |
layer | |
|> Array2D.map | |
(fun (Node weights) -> | |
Array2D.zip activation weights | |
|> Array2D.sumBy (fun (a, b) -> a * b) | |
|> sigmaFun) | |
{ LayerResult.activation = res | |
LayerResult.intervalues = List.append ls [ res ] }) | |
{ LayerResult.activation = inputX | |
LayerResult.intervalues = [] } | |
(* | |
E = L(t, y) | |
net_j = sum(w_ij * o_i) | |
o_j = phi(net_j) = phi(sum(w_ij * o_i)) | |
a E a E a o_j a E a o_j a net_j | |
-- = -- * ------- = ---- * ----- * --- | |
a w_ij a o_j aw_ij a o_j a net_j a w_ij | |
a net_j a | |
------- = ---- * sum( w_ij * o_i) = w_ij | |
a o_i a o_i | |
a net_j a | |
------ = --- sum( w_ij * o_i) = o_i | |
a w_ij a w_ij | |
// If output neuron take the half of the quadratic error as loss function. | |
// And assume o_j = y as this is the output | |
a E a E a 1 2 | |
--- = --- = --- * - * (t - y) = (y - t) | |
a o_j a y a y 2 | |
// If inner layer | |
a E a E a o_l a E a o_l a net_l a E a o_l | |
--- = sum( ---- * --- ) = sum( ---- * --- * ----- ) = sum( ---- * --- * w_jl ) | |
a o_j a o_l a o_j a o_l a net_l a o_j a o_l a net_l | |
a E a E a o_j | |
-- = ---- * ----- * o_i | |
a w_ij a o_j a net_j | |
// Generally this is what delta_j is | |
a E a o_j | |
delta_j = ---- * ----- | |
a o_j a net_j | |
a E | |
-- = delta_j * o_i | |
a w_ij | |
// If j is an output neuron | |
a phi(net_j) | |
delta_j = (y - t) * ----------- | |
a net_j | |
// small nit, generally delta_j is for an output neuron | |
a L(o_j,y) a phi(net_j) | |
delta_j = ---------- * ----------- | |
a o_j a net_j | |
// If j is an inner neuron | |
l a phi(net_j) | |
delta_j = sum( delta_l * w_jl ) * ----------- | |
a net_j | |
logistic_phi(z) = (1 + e^(-z))^-1 | |
d logistic_phi | |
------------- = -1 * (1 + e ^(-z))^(-2) * e^(-z) * (-1) = (1 + e^(-z))^(-2) * e^(-z) = | |
d z | |
= (1 + e^(-z))^(-1) * (1 - (1 + e^(-z))^(-1)) | |
// If j is an inner neuron with logistic function as phi | |
l a phi(net_j) | |
delta_j = sum( delta_l * w_jl ) * ----------- | |
a net_j | |
*) | |
let calcDeltas2D (inputX: float [,]) network sigmaFun (expected: float [,]): Map<PositionNetwork, float> * LayerResult = | |
let phiDerivative o_j = o_j * (1.0 - o_j) | |
// let e = System.Math.E | |
// (1.0 + e**(-z))**(-1.0) * (1.0 - (1.0 + e**(-z))**(-1.0)) | |
let layerResult = getValues2D inputX network sigmaFun | |
let lastLayerIndex = network.layers.Length - 1 | |
let simpleDeltas = | |
layerResult.intervalues.[lastLayerIndex] | |
|> Array2D.foldi | |
(fun ls i k o_j -> | |
let j = (i, k) | |
let res = | |
{ layer = lastLayerIndex; node = j }, (o_j - expected.[i, k]) * phiDerivative o_j | |
res :: ls) | |
[] | |
|> Map | |
let rec calc layer (deltas) = | |
if layer >= 0 then | |
let deltas = | |
layerResult.intervalues.[layer] | |
|> Array2D.foldi | |
(fun (deltas: Map<PositionNetwork, float>) j1 j2 o_j -> | |
let deltaSum = | |
let upperLayer = layer + 1 | |
layerResult.intervalues.[upperLayer] | |
|> Array2D.foldi | |
(fun acc i k _ -> | |
let delta_l = | |
deltas.[{ layer = upperLayer; node = (i, k) }] | |
let w_jl = | |
let (Layer nodes) = network.layers.[upperLayer] | |
let (Node weights) = nodes.[i, k] | |
weights.[j1, j2] | |
acc + w_jl * delta_l) | |
0.0 | |
let delta_j = deltaSum * phiDerivative o_j | |
deltas | |
|> Map.add { layer = layer; node = (j1, j2) } delta_j) | |
deltas | |
calc (layer - 1) deltas | |
else | |
deltas | |
calc (lastLayerIndex - 1) simpleDeltas, layerResult | |
let discreteConvolution (section1: _ [,]) (filterMatrix: _ [,]) = | |
let n = Array2D.length1 section1 | |
let m = Array2D.length2 section1 | |
let getIndexTuple index = int (index / n), index % n | |
let maxIndex = n * m - 1 | |
let bias = 1.0 | |
// We interpret the input as a discrete finite function with n | |
// being the last possible index of the input function (in the mathematically sense). | |
// Therefor we map the 2D indices to 1D indices via going from the left top to the right bottom | |
seq { 0 .. maxIndex } | |
|> Seq.fold | |
(fun acc i -> | |
let (i1, i2) = getIndexTuple i | |
let (k1, k2) = getIndexTuple (maxIndex - i) | |
float section1.[i1, i2] | |
* float filterMatrix.[k1, k2] | |
+ acc) | |
bias | |
let convolutionStep input filter stride: float [,] = | |
let inputLength1 = input |> Array2D.length1 | |
let inputLength2 = input |> Array2D.length2 | |
let filterLength1 = filter |> Array2D.length1 | |
let filterLength2 = filter |> Array2D.length2 | |
// The stride is so far always 1 here. Besides we do narrow convolution. | |
array2D [ for i in 0 .. stride .. (inputLength1 - filterLength1) -> | |
[ for j in 0 .. stride .. (inputLength2 - filterLength2) do | |
let currentSelection = | |
input.[i..(i + filterLength1 - 1), j..(j + filterLength2 - 1)] | |
discreteConvolution currentSelection filter ] ] | |
let inline poolingStep (input: 'a [,]) (windowSize: int) stride = | |
let length1, length2 = | |
Array2D.length1 input, Array2D.length2 input | |
//printfn "max index: %d" (length1 - 1) | |
if (length1 - 1) <> (((length1 - windowSize) / stride) * stride + (windowSize - 1)) then | |
failwithf "Incompatible values for windowSize and windowStride in poolingStep." | |
if (length2 - 1) <> (((length2 - windowSize) / stride) * stride + (windowSize - 1)) then | |
failwithf "Incompatible values for windowSize and windowStride in poolingStep." | |
let res = | |
array2D [ for i in 0 .. stride .. (length1 - windowSize) -> | |
[ for j in 0 .. stride .. (length2 - windowSize) do | |
let currentSelection = | |
input.[i..(i + windowSize - 1), j..(j + windowSize - 1)] | |
//printfn "j: %d" (j + windowSize - 1) | |
let res = | |
Array2D.maxWithWinnerIndex currentSelection | |
let adjustedIndex = | |
let (x, y) = res.maxElementIndex | |
(i + x, j + y) | |
{| res with | |
maxElementIndex = adjustedIndex |} ] ] | |
res | |
let preCalcCNN (input: float [,]) (cnn: CNN) sigmaFun = | |
// Convolution comes first | |
let stride = 1 | |
//printfn $"input row count: {input |> Array2D.length1}, column count: {input |> Array2D.length2}" | |
let convolutionLayers = | |
cnn.filterMatrices | |
|> Array.map (fun filterMatrix -> convolutionStep input filterMatrix stride) | |
let relus = | |
convolutionLayers | |
|> Array.map (Array2D.map (fun pixel -> sigmaFun pixel)) | |
// Now apply pooling | |
let windowSize = 2 | |
let windowStride = 2 | |
let poolingStepResults = | |
relus | |
|> Array.map (fun relu -> poolingStep relu windowSize windowStride) | |
let filterMatrixWidth = | |
cnn.filterMatrices | |
|> Array.head | |
|> Array2D.length2 | |
let poolingStepData, poolingStepResultFixed = | |
let n = | |
poolingStepResults | |
|> Array.head | |
|> Array2D.length1 | |
let columnCount = poolingStepResults |> Array.head |> Array2D.length2 | |
let m = columnCount * poolingStepResults.Length | |
// FilterMatrixHeight x (FilterMatrixWidth * FilterMatricesAmount) | |
// => n x m Matrix | |
Array2D.init | |
n | |
m | |
(fun j i -> | |
let x = i % columnCount | |
let y = j | |
let overallIndex = int (i / columnCount) | |
poolingStepResults.[overallIndex].[y, x] | |
.maxElement) | |
|> Array2D.toMatrix, | |
Array2D.init | |
n | |
m | |
(fun i j -> | |
let x = j % ((poolingStepResults |> Array.head |> Array2D.length2)) | |
let y = i | |
let overallIndex = int (j / ((poolingStepResults |> Array.head |> Array2D.length2))) | |
//printfn $"i: {i}, j: {j}" | |
{| poolingStepResults.[overallIndex].[y, x] with | |
maxElementIndexInResult = (i, j) |}) | |
let maxElementIndices = | |
poolingStepResults | |
|> Array.map (Array2D.fold (fun state element -> element.maxElementIndex :: state) []) | |
let dim1, dim2 = | |
let (Layer layer) = cnn.network.layers.[0] | |
let (Node node) = layer.[0, 0] | |
node |> Array2D.length1, node |> Array2D.length2 | |
if poolingStepData.RowCount <> dim1 | |
|| poolingStepData.ColumnCount <> dim2 then | |
failwithf | |
$"Incompatible dimension of pooling step layer and classic network. Should be {poolingStepData.RowCount} and {poolingStepData.ColumnCount}" | |
{| maxElementIndices = maxElementIndices | |
poolingStepData = poolingStepData |> Matrix.toArray2 | |
filterMatrixWidth = filterMatrixWidth | |
poolingStepResults = poolingStepResultFixed | |
relusLayer = relus |} | |
let calcCNN (input: float [,]) (cnn: CNN) sigmaFun = | |
let preCalcData = preCalcCNN input cnn sigmaFun | |
getValues2D (preCalcData.poolingStepData) cnn.network sigmaFun | |
let calcDeltasAndAdjustment (input: float [,]) (network) sigmaFun (expected: float [,]) = | |
let deltasBeforePoolingLayer, layerResult = | |
calcDeltas2D input network sigmaFun expected | |
let getDimsForLayer layer = | |
let (Layer layer) = network.layers.[layer] | |
Array2D.getDimsForArray2D layer | |
seq { | |
let currentLayer = Array2D.getDimsForArray2D input | |
let upperLayer = getDimsForLayer 0 | |
yield | |
Array2D.init | |
upperLayer.rowCount | |
upperLayer.columnCount | |
(fun i j -> | |
Array2D.init | |
(currentLayer.rowCount) | |
(currentLayer.columnCount) | |
(fun c1 c2 -> | |
let o_kl = input.[c1, c2] | |
{| adjustment = | |
o_kl | |
* deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }] |})) | |
for layer, intervalues in layerResult.intervalues | |
|> Seq.take (layerResult.intervalues.Length - 1) | |
|> Seq.indexed do | |
let currentLayer = getDimsForLayer layer | |
let upperLayer = getDimsForLayer (layer + 1) | |
yield | |
Array2D.init | |
upperLayer.rowCount | |
upperLayer.columnCount | |
(fun i j -> | |
Array2D.init | |
(currentLayer.rowCount) | |
(currentLayer.columnCount) | |
(fun c1 c2 -> | |
let o_kl = intervalues.[c1, c2] | |
{| adjustment = | |
o_kl | |
* deltasBeforePoolingLayer.[{ layer = layer + 1; node = (i, j) }] |})) | |
} | |
|> Seq.toArray, | |
layerResult | |
let gradientDescentNN network sigmaFun inputsWithExpectedOutput abortWhen = | |
let rec fixit network i = | |
let results = | |
inputsWithExpectedOutput | |
|> Array.map (fun (input, expected) -> calcDeltasAndAdjustment input network sigmaFun expected) | |
let averageSquareError: float = | |
let calcAverageErrorForSample layerResult (expected: float [,]): float = | |
layerResult.activation | |
// TODO: Replace with Array2D.indexed | |
|> Array2D.mapi (fun i j activation -> (i, j, activation)) | |
|> Array2D.averageBy (fun (i, j, activation) -> abs (float activation - float expected.[i, j])) | |
let acc = | |
Array.zip results inputsWithExpectedOutput | |
|> Array.sumBy (fun ((_, layerResult), (_, expected)) -> calcAverageErrorForSample layerResult expected) | |
acc / (float results.Length) | |
if i % 1 = 0 then | |
printfn "square error at %20dth iteration: %f" i averageSquareError | |
// We abort if we exceeded the max proposed amount of iterations | |
// or if either the average square error is within the abortWhen range | |
// or if the last iteration did not achieve any change in the average square error | |
if System.Double.IsNaN averageSquareError | |
|| System.Double.IsInfinity averageSquareError then | |
printfn $"Early abort. We seriously screwed up here. Average square error is: {averageSquareError}" | |
network | |
elif i > 1_000_000_000 || averageSquareError <= abortWhen | |
//|| (abs(averageSquareError - lastSquareError) <= 0.000000000001) | |
then | |
network | |
else | |
let adjustments: {| adjustment: float |} [,] [,] [] = | |
let n = inputsWithExpectedOutput.Length | |
let acc = | |
results | |
|> Array.map fst | |
|> Array.reduce | |
(fun adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 -> | |
Array.zip adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 | |
|> Array.map | |
(fun (adjustmentsPerLayer1, adjustmentsPerLayer2) -> | |
Array2D.zip adjustmentsPerLayer1 adjustmentsPerLayer2 | |
|> Array2D.map | |
(fun (adjustmentsPerNode1, adjustmentsPerNode2) -> | |
Array2D.zip adjustmentsPerNode1 adjustmentsPerNode2 | |
|> Array2D.map | |
(fun (adjustment1, adjustment2) -> | |
//printfn "adjustment1: %A, adjustment2: %A" adjustment1 adjustment2 | |
{| adjustment = adjustment1.adjustment + adjustment2.adjustment |})))) | |
acc | |
|> Array.map ( | |
Array2D.map (Array2D.map (fun adjustment -> {| adjustment = adjustment.adjustment / (float n) |})) | |
) | |
let layers = | |
network.layers | |
|> Array.mapi | |
(fun layerNumber (Layer layer) -> | |
//printfn "%d" layerNumber | |
layer | |
|> Array2D.mapi | |
(fun i j (Node node) -> | |
//printfn "i: %d, j: %d" i j | |
//printfn $"{adjustments.[layerNumber] |> Array2D.getDimsForArray2D}" | |
Array2D.zip node adjustments.[layerNumber].[i, j] | |
|> Array2D.map | |
(fun (current, adjustment) -> | |
//printfn "adjustment %f" adjustment.adjustment | |
current - 10.0 * adjustment.adjustment) | |
|> Node) | |
|> Layer) | |
let newNetwork = { layers = layers } | |
fixit newNetwork (i + 1) | |
fixit network 0 | |
let calcDeltasAndAdjustmentCNN (input: float [,]) (cnn: CNN) sigmaFun (expected: float [,]) = | |
let preCalcData = preCalcCNN input cnn sigmaFun | |
let poolingStepData = preCalcData.poolingStepData | |
let maxElementIndices = preCalcData.maxElementIndices | |
let filterMatrixWidth = preCalcData.filterMatrixWidth | |
let deltasBeforePoolingLayer, layerResult = | |
calcDeltas2D poolingStepData cnn.network sigmaFun expected | |
let filterMatricesAdjustments = | |
let dims = | |
let (Layer layer) = cnn.network.layers.[0] | |
Array2D.getDimsForArray2D layer | |
let filterDims = | |
let dims = | |
Array2D.getDimsForArray2D cnn.filterMatrices.[0] | |
{| dims with | |
columnCount = dims.columnCount * cnn.filterMatrices.Length |} | |
//printfn "dim1: %d, dim2: %d" dim1 dim2 | |
let phiDerivative o_j = o_j * (1.0 - o_j) | |
let calcAdjustment m' n' (data: _ seq) = | |
let calcDelta k l value = | |
let acc = | |
seq { | |
for i in 0 .. dims.rowCount - 1 do | |
for j in 0 .. dims.columnCount - 1 -> (i, j) | |
} | |
|> Seq.sumBy | |
(fun (i, j) -> | |
let weight = | |
let (Layer layer) = cnn.network.layers.[0] | |
let (Node weights) = layer.[i, j] | |
weights.[k, l] | |
//printfn "weight: %f" weight | |
let deltaUpperLayer = | |
deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }] | |
//printfn "%A" deltaUpperLayer | |
deltaUpperLayer * weight) | |
let o = value | |
acc * phiDerivative o | |
//printfn "length: %d" <| (data |> Seq.length) | |
if data |> Seq.isEmpty then | |
0. | |
else | |
data | |
|> Seq.sumBy | |
(fun ((i, j), (k, l), value) -> | |
let delta = calcDelta k l value | |
//printfn "%f" delta | |
let o = input.[i + m', j + n'] | |
delta * o) | |
cnn.filterMatrices | |
|> Array.mapi | |
(fun filterMatrixIndex filterMatrix -> | |
let data = | |
preCalcData.poolingStepResults.[*, (filterMatrixIndex * filterMatrixWidth)..((filterMatrixIndex + 1) | |
* filterMatrixWidth | |
- 1)] | |
|> Array2D.fold | |
(fun ls element -> | |
(element.maxElementIndex, element.maxElementIndexInResult, element.maxElement) | |
:: ls) | |
[] | |
filterMatrix | |
|> Array2D.mapi (fun m' n' _ -> calcAdjustment m' n' data)) | |
seq { | |
let filterHeight = | |
cnn.filterMatrices.[0] |> Array2D.length1 | |
let filterWidth = | |
cnn.filterMatrices.[0] |> Array2D.length2 | |
let inputHeight = input |> Array2D.length1 | |
let inputWidth = input |> Array2D.length2 | |
// Adjustments for first layer (convolutional layer) | |
yield | |
Array2D.init | |
(filterHeight) | |
(filterWidth * cnn.filterMatrices.Length) | |
(fun m n -> | |
let n' = n % filterWidth | |
let m' = m | |
let currentFilterIndex = int (n / filterWidth) | |
{| adjustment = filterMatricesAdjustments.[currentFilterIndex].[m', n'] |} | |
|> Array2D.create 1 1) | |
// Adjustments for other layers (classic neuronal network layers) except the last layer. | |
// That one needs special handling | |
let getDimsForLayer layer = | |
let (Layer layer) = cnn.network.layers.[layer] | |
Array2D.getDimsForArray2D layer | |
// Pooling step layer before that | |
let currentLayer = | |
Array2D.getDimsForArray2D poolingStepData | |
let upperLayer = getDimsForLayer 0 | |
yield | |
Array2D.init | |
upperLayer.rowCount | |
upperLayer.columnCount | |
(fun i j -> | |
Array2D.init | |
(currentLayer.rowCount) | |
(currentLayer.columnCount) | |
(fun c1 c2 -> | |
let o_kl = poolingStepData.[c1, c2] | |
{| adjustment = | |
o_kl | |
* deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }] |})) | |
for layer, intervalues in layerResult.intervalues | |
|> Seq.take (layerResult.intervalues.Length - 1) | |
|> Seq.indexed do | |
let currentLayer = getDimsForLayer layer | |
let upperLayer = getDimsForLayer (layer + 1) | |
yield | |
Array2D.init | |
upperLayer.rowCount | |
upperLayer.columnCount | |
(fun i j -> | |
Array2D.init | |
(currentLayer.rowCount) | |
(currentLayer.columnCount) | |
(fun c1 c2 -> | |
let o_kl = intervalues.[c1, c2] | |
{| adjustment = | |
o_kl | |
* deltasBeforePoolingLayer.[{ layer = layer + 1; node = (i, j) }] |})) | |
} | |
|> Seq.toArray, | |
layerResult | |
let sigmaFun value = 1. / (1. + System.Math.E ** (-value)) | |
let (|Finite|NonFinite|) a = | |
if System.Double.IsNaN a then NonFinite | |
else Finite a | |
// Seems like if we call random too soon again it will return the same number again. | |
// This is highly suboptimal for our use case soooo we wait. | |
let random () = | |
let random = | |
new System.Random(System.DateTime.Now.Millisecond) | |
let res = random.NextDouble() * 2.0 - 1.0 | |
System.Threading.Thread.Sleep(1) | |
res | |
// Why this is unsafe see above :D | |
let unsafeRandom () = | |
System | |
.Random(System.DateTime.Now.Millisecond) | |
.NextDouble() | |
let gradientDescentCNN2 cnn sigmaFun inputsWithExpectedOutput abortWhen = | |
inputsWithExpectedOutput | |
|> Array.iter | |
(fun (input, expected) -> | |
let _, initialLayerResult = | |
calcDeltasAndAdjustmentCNN input cnn sigmaFun expected | |
printfn $"initial activation: {initialLayerResult.activation.[0, 0]}") | |
let epsilon = 1e-4 | |
let rec fixit cnn (sumOfGradients: System.Collections.Generic.Dictionary<_, _>) i = | |
let network = cnn.network | |
let results = | |
inputsWithExpectedOutput | |
|> Array.map (fun (input, expected) -> calcDeltasAndAdjustmentCNN input cnn sigmaFun expected) | |
let averageSquareError: float = | |
let calcAverageErrorForSample layerResult (expected: float [,]): float = | |
layerResult.activation | |
// TODO: Replace with Array2D.indexed | |
|> Array2D.mapi (fun i j activation -> (i, j, activation)) | |
|> Array2D.averageBy (fun (i, j, activation) -> | |
//if System.Double.IsNaN activation then | |
// printfn "activation is NaN!" | |
//if System.Double.IsNaN expected.[i, j] then | |
// printfn "expected is NaN!" | |
0.5 * (float activation - float expected.[i, j]) ** 2.0) | |
let acc = | |
Array.zip results inputsWithExpectedOutput | |
|> Array.sumBy (fun ((_, layerResult), (_, expected)) -> calcAverageErrorForSample layerResult expected) | |
acc / (float results.Length) | |
if i % 10 = 0 then | |
//System.Console.Clear() | |
//System.Console.SetCursorPosition(0, 0) | |
$"square error at %20d{i}th iteration: %.40f{averageSquareError} | |
|" | |
.StripMargin | |
|> System.Console.Write | |
// We abort if we exceeded the max proposed amount of iterations | |
// or if either the average square error is within the abortWhen range | |
// or if the last iteration did not achieve any change in the average square error | |
if System.Double.IsNaN averageSquareError | |
|| System.Double.IsInfinity averageSquareError then | |
printfn $"Early abort. We seriously screwed up here. Average square error is: {averageSquareError}" | |
cnn | |
elif i > 1_000_000_000 || averageSquareError <= abortWhen then | |
cnn | |
else | |
let r = random() | |
let adjustments: {| adjustment: float |} [,] [,] [] = | |
let n = inputsWithExpectedOutput.Length | |
let acc = | |
results | |
|> Array.Parallel.map fst | |
|> Array.reduce | |
(fun adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 -> | |
Array.zip adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 | |
|> Array.Parallel.map | |
(fun (adjustmentsPerLayer1, adjustmentsPerLayer2) -> | |
Array2D.zip adjustmentsPerLayer1 adjustmentsPerLayer2 | |
|> Array2D.map | |
(fun (adjustmentsPerNode1, adjustmentsPerNode2) -> | |
Array2D.zip adjustmentsPerNode1 adjustmentsPerNode2 | |
|> Array2D.map | |
(fun (adjustment1, adjustment2) -> | |
//if System.Double.IsNaN adjustment1.adjustment then | |
// printfn "adjustment1 is NaN!" | |
//if System.Double.IsNaN adjustment2.adjustment then | |
// printfn "adjustment1 is NaN!" | |
//printfn "adjustment1: %A, adjustment2: %A" adjustment1 adjustment2 | |
{| adjustment = adjustment1.adjustment + adjustment2.adjustment |})))) | |
acc | |
|> Array.map ( | |
Array2D.map (Array2D.map (fun adjustment -> {| adjustment = adjustment.adjustment / (float n) |})) | |
) | |
let learningRate = 0.1 | |
let layers = | |
cnn.network.layers | |
|> Array.Parallel.mapi | |
(fun layerNumber (Layer layer) -> | |
layer | |
|> Array2D.mapi | |
(fun i j (Node node) -> | |
Array2D.zip node adjustments.[layerNumber + 1].[i, j] | |
|> Array2D.mapi | |
(fun k l (current, adjustment) -> | |
// printfn "adjustment %f" adjustment.adjustment | |
//let res = | |
// current - learningRate * adjustment.adjustment | |
let acc = | |
match sqrt(sumOfGradients.[(layerNumber, i, j, k, l)] + epsilon) with | |
| Finite res -> res | |
| NonFinite -> 1. | |
let res = current - (learningRate / acc) * adjustment.adjustment | |
sumOfGradients.[(layerNumber, i, j, k, l)] <- | |
sumOfGradients.[(layerNumber, i, j, k, l)] | |
+ adjustment.adjustment | |
res) | |
|> Node) | |
|> Layer) | |
let newNetwork = { layers = layers } | |
let newFilters = | |
let adjustmentData = adjustments |> Array.head | |
cnn.filterMatrices | |
|> Array.map ( | |
Array2D.mapi | |
(fun i j currentFilterValue -> | |
let adjustment = adjustmentData.[i, j].[0, 0].adjustment | |
//let res = | |
// currentFilterValue - learningRate * adjustment | |
let acc = | |
match sqrt(sumOfGradients.[(-1, i, j, 0, 0)] + epsilon) with | |
| Finite res -> res | |
| NonFinite -> 1. | |
let res = currentFilterValue - (learningRate / acc) * adjustment | |
sumOfGradients.[(-1, i, j, 0, 0)] <- sumOfGradients.[(-1, i, j, 0, 0)] + adjustment | |
res) | |
) | |
fixit | |
{ filterMatrices = newFilters | |
network = newNetwork } | |
sumOfGradients | |
(i + 1) | |
let initialSumOfGradients = | |
let dict = System.Collections.Generic.Dictionary() | |
cnn.network.layers | |
|> Array.iteri | |
(fun layerNumber (Layer layer) -> | |
layer | |
|> Array2D.iteri | |
(fun i j (Node node) -> | |
node | |
|> Array2D.iteri (fun k l _ -> dict.[(layerNumber, i, j, k, l)] <- 0.))) | |
cnn.filterMatrices | |
|> Array.iter (Array2D.iteri (fun i j currentFilterValue -> dict.[(-1, i, j, 0, 0)] <- 0.)) | |
dict | |
fixit cnn initialSumOfGradients 0 | |
let cnnTest () = | |
let randomWeight () = random () | |
let pairs = | |
let amount = 2 | |
let expected = | |
[ [ [ 0.0 ]; [ 1.0 ] ] |> array2D | |
[ [ 1.0 ]; [ 0.0 ] ] |> array2D ] | |
[ | |
array2D [ | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ] | |
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ] | |
], expected.[0] | |
array2D [ | |
[ 1.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 1.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 1.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ -0.5; 0.; 0.; 0.; 0.; 0.; 0.; -0.5 ] | |
//[ 0.; -0.5; 0.; 0.; 0.; 0.; -0.5; 0. ] | |
//[ 0.; 0.; -0.5; 0.; 0.; -0.5; 0.; 0. ] | |
//[ 0.; 0.; 0.; -0.5; -0.5; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; -0.5; -0.5; 0.; 0.; 0. ] | |
//[ 0.; 0.; -0.5; 0.; 0.; -0.5; 0.; 0. ] | |
//[ 0.; -0.5; 0.; 0.; 0.; 0.; -0.5; 0. ] | |
//[ -0.5; 0.; 0.; 0.; 0.; 0.; 0.; -0.5 ] | |
], | |
expected.[0] | |
array2D [ [ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ] | |
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ] | |
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ] | |
], | |
expected.[1] | |
] | |
|> List.toArray | |
|> Array.map(fun (values, expected) -> | |
values |> Array2D.map(fun value -> value - random() * 0.05), expected) | |
let inputHeight, inputWidth = pairs |> Array.head |> (fun (values, _) -> values |> Array2D.length1, values |> Array2D.length2) | |
let filterSize = 3 | |
let randomFilter _ = | |
Array2D.init filterSize filterSize (fun _ _ -> randomWeight ()) | |
let filters = Array.init 4 randomFilter | |
let convolutionStride = 1 | |
let poolingLayerStride = 2 | |
let poolingLayerWindowSize = 2 | |
let network2D_1 = | |
{ Network2D.layers = | |
let layer1X = 4 | |
let layer1Y = 2 | |
let poolingLayerHeight = | |
let n = ((inputHeight - filterSize) / convolutionStride) + 1 // seq { 0 .. convolutionStride .. (inputHeight - filterSize) } |> Seq. | |
let m = ((n - poolingLayerWindowSize) / poolingLayerStride) + 1 | |
m | |
// ((1 + (inputHeight - filterSize) / convolutionStride) / poolingLayerStride) | |
let poolingLayerWidth = | |
let n = ((inputWidth - filterSize) / convolutionStride) + 1 | |
let m = ((n - poolingLayerWindowSize) / poolingLayerStride) + 1 | |
m * filters.Length | |
printfn $"{poolingLayerHeight} {poolingLayerWidth}" | |
[| Layer | |
<| Array2D.create | |
layer1X | |
layer1Y | |
(Node | |
<| Array2D.init poolingLayerHeight poolingLayerWidth (fun _ _ -> randomWeight ())) | |
Layer | |
<| Array2D.create | |
2 | |
1 | |
(Node | |
<| Array2D.init layer1X layer1Y (fun _ _ -> randomWeight ())) |] } | |
let optimizedNetwork = | |
//gradientDescentNN | |
// network2D_1 | |
// sigmaFun | |
// pairs | |
// 0.08 | |
gradientDescentCNN2 | |
{ CNN.filterMatrices = filters | |
CNN.network = network2D_1 } | |
sigmaFun | |
pairs | |
0.001 | |
let testSamples = | |
[ [ let r() = -random () | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ] | |
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
] | |
|> array2D |> Array2D.map(function 1.0 -> 1.0 - random() * 0.1 | _ -> random() * 0.1), | |
[ [ 0.0 ]; [ 1.0 ] ] |> array2D | |
[ | |
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ] | |
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
] | |
|> array2D, | |
[ [ 1.0 ]; [ 0.0 ] ] |> array2D | |
[ [ 1.; 1.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ] | |
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ] | |
[ 1.; 1.; 1.; 1.; 1.; 1.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
] | |
|> array2D, | |
[ [ 1.0 ]; [ 0.0 ] ] |> array2D | |
[ | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ] | |
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ] | |
[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ] | |
] | |
|> array2D | |
|> Array2D.map (fun value -> value * random () * 0.5 + 0.5), | |
[ [ 1.0 ]; [ 0.0 ] ] |> array2D ] | |
|> List.toArray | |
testSamples | |
|> Array.append pairs | |
|> Array.map | |
(fun (input, expected) -> | |
(calcCNN input optimizedNetwork sigmaFun) | |
.activation, | |
expected) | |
cnnTest () | |
|> printfn "%A" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment