|  | struct MNISTParameters : ParameterAggregate { | 
        
          |  | var w1 = Tensor<Float>(randomNormal: [784, 30]) | 
        
          |  | var w2 = Tensor<Float>(randomNormal: [30, 10]) | 
        
          |  |  | 
        
          |  | // Compiler-synthesized: | 
        
          |  | // static var allKeyPaths: [WritableKeyPath<MNISTParameters, Tensor<Float>>] { | 
        
          |  | //   return [\MNISTParameters.w1, \MNISTParameters.w2] | 
        
          |  | // } | 
        
          |  | // Learn more about key paths here: https://github.com/apple/swift-evolution/blob/master/proposals/0161-key-paths.md | 
        
          |  | } | 
        
          |  |  | 
        
          |  | struct AdamOptimizer { | 
        
          |  | typealias Scalar = Float | 
        
          |  |  | 
        
          |  | var learningRate: Scalar | 
        
          |  | var beta1: Scalar | 
        
          |  | var beta2: Scalar | 
        
          |  | var epsilon: Scalar | 
        
          |  |  | 
        
          |  | init(learningRate: Scalar = 0.001, beta1: Scalar = 0.9, beta2: Scalar = 0.999, epsilon: Scalar = 1e-8) { | 
        
          |  | self.learningRate = learningRate | 
        
          |  | self.beta1 = beta1 | 
        
          |  | self.beta2 = beta2 | 
        
          |  | self.epsilon = epsilon | 
        
          |  | } | 
        
          |  |  | 
        
          |  | var step: Float = 0 | 
        
          |  | var firstMoments: MNISTParameters? = nil | 
        
          |  | var secondMoments: MNISTParameters? = nil | 
        
          |  |  | 
        
          |  | // `fitParameters` can be generalized to work with any `ParameterAggregate`-conforming type when such types | 
        
          |  | // define a zero initializer. There are multiple ways to enable this (e.g. conforming `ParameterAggregate` to | 
        
          |  | // `VectorNumeric`). | 
        
          |  | mutating func fitParameters( | 
        
          |  | _ parameters: inout MNISTParameters, | 
        
          |  | withGradients gradients: MNISTParameters | 
        
          |  | ) { | 
        
          |  | func initializeWithZerosIfNeeded(_ x: MNISTParameters?) -> MNISTParameters { | 
        
          |  | return x ?? MNISTParameters( | 
        
          |  | w1: Tensor(0).broadcast(like: parameters.w1), | 
        
          |  | w2: Tensor(0).broadcast(like: parameters.w2) | 
        
          |  | ) | 
        
          |  | } | 
        
          |  |  | 
        
          |  | var firstMoments = initializeWithZerosIfNeeded(self.firstMoments) | 
        
          |  | var secondMoments = initializeWithZerosIfNeeded(self.secondMoments) | 
        
          |  | step += 1 | 
        
          |  |  | 
        
          |  | // Iterating over `allKeyPaths` and applying key paths currently produce sends/receives. | 
        
          |  | // It should be possible to eliminate sends/receives eventually, by fully unrolling the loop at compile-time | 
        
          |  | // and implementing compile-time evaluation of key path initialization and application. | 
        
          |  | // Read the key path design for more information. | 
        
          |  | for kp in MNISTParameters.allKeyPaths { | 
        
          |  | firstMoments[keyPath: kp] = | 
        
          |  | firstMoments[keyPath: kp] * beta1 + (1 - beta1) * gradients[keyPath: kp] | 
        
          |  | secondMoments[keyPath: kp] = | 
        
          |  | firstMoments[keyPath: kp] * beta2 + (1 - beta2) * gradients[keyPath: kp] * gradients[keyPath: kp] | 
        
          |  |  | 
        
          |  | let denominator = sqrt(secondMoments[keyPath: kp]) + epsilon | 
        
          |  | let biasCorrection1 = 1 - pow(beta1, step) | 
        
          |  | let biasCorrection2 = 1 - pow(beta2, step) | 
        
          |  | let stepSize = learningRate * sqrt(biasCorrection2) / biasCorrection1 | 
        
          |  | parameters[keyPath: kp] -= stepSize * firstMoments[keyPath: kp] / denominator | 
        
          |  | } | 
        
          |  |  | 
        
          |  | self.firstMoments = firstMoments | 
        
          |  | self.secondMoments = secondMoments | 
        
          |  | } | 
        
          |  | } |