vojtamolda · January 2, 2023 16:57
diff --git a/!Differentiable Shallow Water PDE Solver.md b/!Differentiable Shallow Water PDE Solver.md
diff --git a/ArrayLoopSolution.swift b/ArrayLoopSolution.swift
 import Foundation
 import Swim


 // MARK: Solution of shallow water equation

 /// Differentiable solution of shallow water equation on a unit square.
 ///
 /// Shallow water equation is a type of hyperbolic partial differential equation (PDE). This struct
 /// represents its solution calculated with finite-difference discretization on a 2D plane and at a
 /// particular point in time.
 ///
 /// More details about the shallow water PDE can found for example on
 /// [Wikipedia](https://en.wikipedia.org/wiki/Shallow_water_equations)
 ///
 /// # Domain and Discretization
 /// The PDE is solved on a `<0,1>x<0,1>` square discretized with spatial step of size `Δx`.
 /// Laplace operator is approximated with five-point stencil finite-differencing.
 ///
 /// Temporal advancing uses semi implicit Euler's schema. Time step `Δt` is calculated from
 /// `Δx` to stay below the Courant–Friedrichs–Lewy numerical stability limit.
 ///
 /// # Boundary Conditions
 /// Values around the edges of the domain are subject to trivial Dirichlet boundary conditions
 /// (i.e. equal to 0 with an arbitrary gradient).
 ///
 /// # Laplace Operator Δ
 /// Discretization of the operator is implemented as tight loops over the water height field.
 /// This is a very naive but natural implementation that serves as a performance baseline
 /// on the CPU.
 ///
 struct ArrayLoopSolution: ShallowWaterEquationSolution {
    /// Water level height
    var waterLevel: [[Float]] { u1 }
    /// Solution time
    var time: Float { t }

    /// Height of the water surface at time `t`
    private var u1: [[Float]]
    /// Height of the water surface at previous time-step `t - Δt`
    private var u0: [[Float]]
    /// Solution time
    @noDerivative private let t: Float
    /// Speed of sound
    @noDerivative private let c: Float = 340.0
    /// Dispersion coefficient
    @noDerivative private let α: Float = 0.001
    /// Number of spatial grid points
    @noDerivative private let resolution: Int = 256
    /// Spatial discretization step
    @noDerivative private var Δx: Float { 1 / Float(resolution) }
    /// Time-step calculated to stay below the CFL stability limit
    @noDerivative private var Δt: Float { (sqrt(α * α + Δx * Δx / 3) - α) / c }

    /// Creates initial solution with water level `u0` at time `t`.
    @differentiable
    init(waterLevel u0: [[Float]], time t: Float = 0.0) {
        self.u0 = u0
        self.u1 = u0
        self.t = t

        precondition(u0.count == resolution)
        precondition(u0.allSatisfy { $0.count == resolution })
    }

    /// Calculates solution stepped forward by one time-step `Δt`.
    ///
    /// - `u0` - Water surface height at previous time step
    /// - `u1` - Water surface height at current time step
    /// - `u2` - Water surface height at next time step (calculated)
    @differentiable
    func evolved() -> ArrayLoopSolution {
        var u2 = u1

        for x in 1 ..< resolution - 1 {
            for y in 1 ..< resolution - 1 {
                // FIXME: Should be u2[x][y] = ...
                u2.update(x, y, to:
                            2 * u1[x][y] +
                            (c * c * Δt * Δt + c * α * Δt) * Δ(u1, x, y) -
                            u0[x][y] - c * α * Δt * Δ(u0, x, y)
                )
            }
        }

        return ArrayLoopSolution(u0: u1, u1: u2, t: t + Δt)
    }
    
    /// Constructs intermediate solution with previous water level `u0`, current water level `u1` and time `t`.
    @differentiable
    private init(u0: [[Float]], u1: [[Float]], t: Float) {
        self.u0 = u0
        self.u1 = u1
        self.t = t
        
        precondition(u0.count == self.resolution)
        precondition(u0.allSatisfy { $0.count == self.resolution })
        precondition(u1.count == self.resolution)
        precondition(u1.allSatisfy { $0.count == self.resolution })
    }

    /// Applies discretized Laplace operator to scalar field `u` at grid points `x` and `y`.
    @differentiable
    private func Δ(_ u: [[Float]], _ x: Int, _ y: Int) -> Float {
        (                u[x][y + 1]
        + u[x - 1][y] - (4 * u[x][y]) + u[x + 1][y] +
                         u[x][y - 1]                ) / Δx / Δx
    }
 }


 // MARK: - Cost calculated as mean L2 distance to a target image

 extension ArrayLoopSolution {

    /// Calculates mean squared error loss between the solution and a `target` grayscale image.
    @differentiable
    func meanSquaredError(to target: Swim.Image<Gray, Float>) -> Float {
        precondition(target.width == resolution && target.height == resolution)

        var mse: Float = 0.0
        for x in 0 ..< resolution {
            for y in 0 ..< resolution {
                let error = target[x, y][.gray] - u1[x][y]
                mse += error * error * Δx * Δx
            }
        }
        return mse
    }

 }


 // MARK: - Workaround for non-differentiable coroutines
 // https://bugs.swift.org/browse/TF-1078
 // https://bugs.swift.org/browse/TF-1080

 fileprivate extension Array where Element == [Float] {

    @differentiable(wrt: (self, value))
    mutating func update(_ x: Int, _ y: Int, to value: Float) {
        let _ = withoutDerivative(at: (value)) { value -> Int? in
            self[x][y] = value
            return nil
        }
    }

    @derivative(of: update, wrt: (self, value))
    mutating func vjpUpdate(_ x: Int, _ y: Int, to value: Float) ->
            (value: (), pullback: (inout Array<[Float]>.TangentVector) -> Float) {
        
        self.update(x, y, to: value)

        func pullback(`self`: inout Array<[Float]>.TangentVector) -> Float {
            let `value` = `self`[x][y]
            `self`[x][y] = Float(0)
            return `value`
        }
        return ((), pullback)
    }
 }
diff --git a/Benchmarks.ipynb b/Benchmarks.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Benchmarks.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "name": "swift",
      "display_name": "Swift"
    },
    "nteract": {
      "version": "0.23.3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "r67aNfYc255C",
        "colab_type": "text"
      },
      "source": [
        "# Differentiable Shallow Water PDE Solver - Benchmarks\n",
        "\n",
        "\n",
        "[![GitHubBadge]][GitHubLink] [![ColabBadge]][ColabLink]\n",
        "\n",
        "\n",
        "[ColabBadge]: https://colab.research.google.com/assets/colab-badge.svg \"Run notebook in Google Colab\"\n",
        "[ColabLink]: https://colab.research.google.com/gist/vojtamolda/bd85033cf62877e6f8ada68b8bbb32a0/Benchmarks.ipynb\n",
        "\n",
        "[GitHubBadge]: https://img.shields.io/badge/|-Edit_on_GitHub-green.svg?logo=github \"Edit notebook's source code on GitHub\"\n",
        "[GitHubLink]: https://gist.github.com/vojtamolda/bd85033cf62877e6f8ada68b8bbb32a0#file-benchmarks-ipynb"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kZRlD4utdPuX",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "2ca5e338-1294-4c59-936c-1b8557c3178f"
      },
      "source": [
        "// Clone Gist\n",
        "%system rm --recursive --force Solver\n",
        "%system git clone https://gist.github.com/bd85033cf62877e6f8ada68b8bbb32a0.git Solver\n",
        "\n",
        "// Install Packages\n",
        "%install-swiftpm-flags -c release\n",
        "%install-location /swift/packages\n",
        "\n",
        "%install '.package(url: \"https://github.com/google/swift-benchmark\", .branch(\"master\"))' Benchmark\n",
        "%install '.package(url: \"https://github.com/vojtamolda/Plotly.swift.git\", from: \"0.3.1\")' Plotly\n",
        "%install '.package(url: \"https://github.com/t-ae/Swim.git\", from: \"3.9.0\")' Swim\n",
        "\n",
        "// Clear Output\n",
        "print(\"\\u{001B}[2J\")"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\r\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TSrcgMtVubRa",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import TensorFlow\n",
        "import Benchmark"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-DPUrPuNtIYW",
        "colab_type": "text"
      },
      "source": [
        "## Differentiable PDE Solver\n",
        "\n",
        "The next cell imports three different implementations differentiable shallow water PDE solver from a cloned git repository. The full source code and a readme file can be found in [this GitHub gist](https://gist.github.com/bd85033cf62877e6f8ada68b8bbb32a0.git).\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OmiNmB4e2z4h",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "%include \"Solver/Solution.swift\"\n",
        "%include \"Solver/ArrayLoopSolution.swift\"\n",
        "%include \"Solver/TensorLoopSolution.swift\"\n",
        "%include \"Solver/TensorSliceSolution.swift\"\n",
        "%include \"Solver/TensorConvSolution.swift\""
      ],
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3TAhpc4D3ZQl",
        "colab_type": "text"
      },
      "source": [
        "## Benchmarks\n",
        "\n",
        "The following code runs a simulation of a water surface behavior in a rectangular bathtub. There's an initial \"splash\" at the begining. The splash generates surface gravity waves that propagate away from the center and reflect off the domain walls. There's three different versions, one for each implementation of the solver.\n",
        "\n",
        "Implementations that use the `Tensor` type for numerical values also acept the `device` argument. This allows them to run with XLA acceleration."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "id": "N2ZR02DGpKTn",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "let n = 256\n",
        "let duration = 512"
      ],
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LqDnHRBGHwbX",
        "colab_type": "text"
      },
      "source": [
        "#### A - `ArrayLoopSolution`"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "G3ibX3LSt38i",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "func splashArrayLoop() {\n",
        "    var initialWaterLevel = [[Float]](repeating: [Float](repeating: 0.0, count: n), count: n)\n",
        "    initialWaterLevel[n / 2][n / 2] = 100\n",
        "\n",
        "    let initialSolution = ArrayLoopSolution(waterLevel: initialWaterLevel)\n",
        "    _ = [ArrayLoopSolution](evolve: initialSolution, for: duration)\n",
        "}"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qV01lB6KH1dC",
        "colab_type": "text"
      },
      "source": [
        "#### B - `TensorLoopSolution`"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UrhjMlGy3xeG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "func splashTensorLoop(on device: Device) {\n",
        "    var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
        "    initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
        "\n",
        "    let initialSolution = TensorLoopSolution(waterLevel: initialWaterLevel)\n",
        "    _ = [TensorLoopSolution](evolve: initialSolution, for: duration)\n",
        "}"
      ],
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "o5sFqQCkH45N",
        "colab_type": "text"
      },
      "source": [
        "#### C - `TensorSliceSolution`"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tCHD-P3wgGN-",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "func splashTensorSlice(on device: Device) {\n",
        "    var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
        "    initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
        "\n",
        "    let initialSolution = TensorSliceSolution(waterLevel: initialWaterLevel)\n",
        "    _ = [TensorSliceSolution](evolve: initialSolution, for: duration)\n",
        "}"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mbVZjvJoNjFD",
        "colab_type": "text"
      },
      "source": [
        "#### D - `TensorConvSolution`"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KiBDfV0XNkZP",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "func splashTensorConv(on device: Device) {\n",
        "    var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
        "    initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
        "\n",
        "    let initialSolution = TensorConvSolution(waterLevel: initialWaterLevel)\n",
        "    _ = [TensorConvSolution](evolve: initialSolution, for: duration)\n",
        "}"
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "MKYrWCE5s3ob",
        "colab_type": "text"
      },
      "source": [
        "## Results\n",
        "\n",
        "Not yet conclusive..."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FfA4OKIPgtQx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "let splashBenchmarks = BenchmarkSuite(name: \"Shallow Water PDE Solver\",\n",
        "                                      settings: Iterations(10), WarmupIterations(2)) { suite in\n",
        "    suite.benchmark(\"Array Loop\") { splashArrayLoop() }\n",
        "\n",
        "    // This is at least 1000x slower. One can easily grow old while running the benchmark :(\n",
        "    //suite.benchmark(\"Tensor Loop\") { splashTensorLoop(on: Device.default) }\n",
        "    //suite.benchmark(\"Tensor Loop (XLA)\") { splashTensorLoop(on: Device.defaultXLA) }\n",
        "\n",
        "    suite.benchmark(\"Tensor Slice\") { splashTensorSlice(on: Device.default) }\n",
        "    suite.benchmark(\"Tensor Slice (XLA)\") { splashTensorSlice(on: Device.defaultXLA) }\n",
        "\n",
        "    suite.benchmark(\"Tensor Conv\") { splashTensorConv(on: Device.default) }\n",
        "    suite.benchmark(\"Tensor Conv (XLA)\") { splashTensorConv(on: Device.defaultXLA) }\n",
        "}"
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Uhq7IKkLcw25",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 238
        },
        "outputId": "208b1369-359b-4f58-c806-b8e815f6631b"
      },
      "source": [
        "Benchmark.main([splashBenchmarks])"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "running Shallow Water PDE Solver: Array Loop... done! (363294.84 ms)\n",
            "running Shallow Water PDE Solver: Tensor Slice... done! (9566.95 ms)\n",
            "running Shallow Water PDE Solver: Tensor Slice (XLA)... done! (17147.61 ms)\n",
            "running Shallow Water PDE Solver: Tensor Conv... done! (83571.66 ms)\n",
            "running Shallow Water PDE Solver: Tensor Conv (XLA)... done! (19268.14 ms)\n",
            "\n",
            "name                                        time               std        iterations warmup        \n",
            "---------------------------------------------------------------------------------------------------\n",
            "Shallow Water PDE Solver.Array Loop         30268327673.500 ns ±   0.40 %         10 60451444387 ns\n",
            "Shallow Water PDE Solver.Tensor Slice         741456533.500 ns ±   4.13 %         10  2012324321 ns\n",
            "Shallow Water PDE Solver.Tensor Slice (XLA)  1359265173.500 ns ±   2.19 %         10  3534559405 ns\n",
            "Shallow Water PDE Solver.Tensor Conv         7026560678.500 ns ±   2.92 %         10 13381071502 ns\n",
            "Shallow Water PDE Solver.Tensor Conv (XLA)   1580760053.500 ns ±   1.52 %         10  3472683097 ns\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
diff --git a/CLoopSolution.c b/CLoopSolution.c
 #include <math.h>
 #include <time.h>
 #include <stdio.h>
 #include <stdlib.h>


 const int n = 256;
 const int duration = 512;

 const float c = 340.0f;
 const float alpha = 0.00001f;


 int idx(const int x, const int y) {
    return y * n + x;
 }

 float laplace(const float* const u, const int x, const int y) {
    const float dx = 1.0f / n;

    return (-4 * u[idx(x,y)] \
            + u[idx(x+1,y)] + u[idx(x-1,y)] \
            + u[idx(x,y+1)] + u[idx(x,y-1)]) / dx / dx;
 }

 void timestep(float* u, const float* const u1, const float* const u0) {
    const float dx = 1.0f / n;
    const float dt = (sqrt(alpha * alpha + dx * dx / 3.0f) - alpha) / c;

    for (int x = 1; x < n-1; x++) {
        for (int y = 1; y < n-1; y++) {
            u[idx(x,y)] = 2 * u1[idx(x,y)] \
                          + (c * c * dt * dt + c * alpha * dt) * laplace(u1, x, y) \
                          - u0[idx(x,y)] - c * alpha * dt * laplace(u0, x, y);
        }
    }
 }

 void splash() {
    float* u = (float*) calloc(n*n, sizeof(float));

    float* u1 = (float*) calloc(n*n, sizeof(float));
    u1[idx(n/2, n/2)] = 100.0f;

    float* u0 = (float*) calloc(n*n, sizeof(float));
    u0[idx(n/2, n/2)] = 100.0f;

    for (int t = 0; t < duration; t++) {
        timestep(u, u1, u0);

        float* tmp = u0;
        u0 = u1;
        u1 = u;
        u = tmp;
    }

    free(u);
    free(u1);
    free(u0);
 }

 int main(int argc, const char * argv[]) {
    printf("Warmup...\n");
    for (int i = 0; i < 2; i++) {
        splash();
    }

    printf("Benchmarks...\n");
    for (int i = 0; i < 10; i++) {
        clock_t start = clock();
        splash();
        long double milisecs = (long double)(clock() - start) * 1000.0 / CLOCKS_PER_SEC;

        printf("%Lf ms\n", milisecs);
    }

    return 0;
 }
diff --git a/Optimization.gif b/Optimization.gif
diff --git a/Showcase.ipynb b/Showcase.ipynb
diff --git a/Solution.swift b/Solution.swift
diff --git a/Splash.gif b/Splash.gif
diff --git a/Target.png b/Target.png
diff --git a/TensorConvSolution.swift b/TensorConvSolution.swift
diff --git a/TensorLoopSolution.swift b/TensorLoopSolution.swift
diff --git a/TensorSliceSolution.swift b/TensorSliceSolution.swift
diff --git a/Visualization.swift b/Visualization.swift
	import Foundation
	import Swim


	// MARK: Solution of shallow water equation

	/// Differentiable solution of shallow water equation on a unit square.
	///
	/// Shallow water equation is a type of hyperbolic partial differential equation (PDE). This struct
	/// represents its solution calculated with finite-difference discretization on a 2D plane and at a
	/// particular point in time.
	///
	/// More details about the shallow water PDE can found for example on
	/// [Wikipedia](https://en.wikipedia.org/wiki/Shallow_water_equations)
	///
	/// # Domain and Discretization
	/// The PDE is solved on a `<0,1>x<0,1>` square discretized with spatial step of size `Δx`.
	/// Laplace operator is approximated with five-point stencil finite-differencing.
	///
	/// Temporal advancing uses semi implicit Euler's schema. Time step `Δt` is calculated from
	/// `Δx` to stay below the Courant–Friedrichs–Lewy numerical stability limit.
	///
	/// # Boundary Conditions
	/// Values around the edges of the domain are subject to trivial Dirichlet boundary conditions
	/// (i.e. equal to 0 with an arbitrary gradient).
	///
	/// # Laplace Operator Δ
	/// Discretization of the operator is implemented as tight loops over the water height field.
	/// This is a very naive but natural implementation that serves as a performance baseline
	/// on the CPU.
	///
	struct ArrayLoopSolution: ShallowWaterEquationSolution {
	/// Water level height
	var waterLevel: [[Float]] { u1 }
	/// Solution time
	var time: Float { t }

	/// Height of the water surface at time `t`
	private var u1: [[Float]]
	/// Height of the water surface at previous time-step `t - Δt`
	private var u0: [[Float]]
	/// Solution time
	@noDerivative private let t: Float
	/// Speed of sound
	@noDerivative private let c: Float = 340.0
	/// Dispersion coefficient
	@noDerivative private let α: Float = 0.001
	/// Number of spatial grid points
	@noDerivative private let resolution: Int = 256
	/// Spatial discretization step
	@noDerivative private var Δx: Float { 1 / Float(resolution) }
	/// Time-step calculated to stay below the CFL stability limit
	@noDerivative private var Δt: Float { (sqrt(α * α + Δx * Δx / 3) - α) / c }

	/// Creates initial solution with water level `u0` at time `t`.
	@differentiable
	init(waterLevel u0: [[Float]], time t: Float = 0.0) {
	self.u0 = u0
	self.u1 = u0
	self.t = t

	precondition(u0.count == resolution)
	precondition(u0.allSatisfy { $0.count == resolution })
	}

	/// Calculates solution stepped forward by one time-step `Δt`.
	///
	/// - `u0` - Water surface height at previous time step
	/// - `u1` - Water surface height at current time step
	/// - `u2` - Water surface height at next time step (calculated)
	@differentiable
	func evolved() -> ArrayLoopSolution {
	var u2 = u1

	for x in 1 ..< resolution - 1 {
	for y in 1 ..< resolution - 1 {
	// FIXME: Should be u2[x][y] = ...
	u2.update(x, y, to:
	2 * u1[x][y] +
	(c * c * Δt * Δt + c * α * Δt) * Δ(u1, x, y) -
	u0[x][y] - c * α * Δt * Δ(u0, x, y)
	)
	}
	}

	return ArrayLoopSolution(u0: u1, u1: u2, t: t + Δt)
	}

	/// Constructs intermediate solution with previous water level `u0`, current water level `u1` and time `t`.
	@differentiable
	private init(u0: [[Float]], u1: [[Float]], t: Float) {
	self.u0 = u0
	self.u1 = u1
	self.t = t

	precondition(u0.count == self.resolution)
	precondition(u0.allSatisfy { $0.count == self.resolution })
	precondition(u1.count == self.resolution)
	precondition(u1.allSatisfy { $0.count == self.resolution })
	}

	/// Applies discretized Laplace operator to scalar field `u` at grid points `x` and `y`.
	@differentiable
	private func Δ(_ u: [[Float]], _ x: Int, _ y: Int) -> Float {
	( u[x][y + 1]
	+ u[x - 1][y] - (4 * u[x][y]) + u[x + 1][y] +
	u[x][y - 1] ) / Δx / Δx
	}
	}


	// MARK: - Cost calculated as mean L2 distance to a target image

	extension ArrayLoopSolution {

	/// Calculates mean squared error loss between the solution and a `target` grayscale image.
	@differentiable
	func meanSquaredError(to target: Swim.Image<Gray, Float>) -> Float {
	precondition(target.width == resolution && target.height == resolution)

	var mse: Float = 0.0
	for x in 0 ..< resolution {
	for y in 0 ..< resolution {
	let error = target[x, y][.gray] - u1[x][y]
	mse += error * error * Δx * Δx
	}
	}
	return mse
	}

	}


	// MARK: - Workaround for non-differentiable coroutines
	// https://bugs.swift.org/browse/TF-1078
	// https://bugs.swift.org/browse/TF-1080

	fileprivate extension Array where Element == [Float] {

	@differentiable(wrt: (self, value))
	mutating func update(_ x: Int, _ y: Int, to value: Float) {
	let _ = withoutDerivative(at: (value)) { value -> Int? in
	self[x][y] = value
	return nil
	}
	}

	@derivative(of: update, wrt: (self, value))
	mutating func vjpUpdate(_ x: Int, _ y: Int, to value: Float) ->
	(value: (), pullback: (inout Array<[Float]>.TangentVector) -> Float) {

	self.update(x, y, to: value)

	func pullback(`self`: inout Array<[Float]>.TangentVector) -> Float {
	let `value` = `self`[x][y]
	`self`[x][y] = Float(0)
	return `value`
	}
	return ((), pullback)
	}
	}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Benchmarks.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"toc_visible": true
	},
	"kernelspec": {
	"name": "swift",
	"display_name": "Swift"
	},
	"nteract": {
	"version": "0.23.3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "r67aNfYc255C",
	"colab_type": "text"
	},
	"source": [
	"# Differentiable Shallow Water PDE Solver - Benchmarks\n",
	"\n",
	"\n",
	"[![GitHubBadge]][GitHubLink] [![ColabBadge]][ColabLink]\n",
	"\n",
	"\n",
	"[ColabBadge]: https://colab.research.google.com/assets/colab-badge.svg \"Run notebook in Google Colab\"\n",
	"[ColabLink]: https://colab.research.google.com/gist/vojtamolda/bd85033cf62877e6f8ada68b8bbb32a0/Benchmarks.ipynb\n",
	"\n",
	"[GitHubBadge]: https://img.shields.io/badge/\|-Edit_on_GitHub-green.svg?logo=github \"Edit notebook's source code on GitHub\"\n",
	"[GitHubLink]: https://gist.github.com/vojtamolda/bd85033cf62877e6f8ada68b8bbb32a0#file-benchmarks-ipynb"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "kZRlD4utdPuX",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "2ca5e338-1294-4c59-936c-1b8557c3178f"
	},
	"source": [
	"// Clone Gist\n",
	"%system rm --recursive --force Solver\n",
	"%system git clone https://gist.github.com/bd85033cf62877e6f8ada68b8bbb32a0.git Solver\n",
	"\n",
	"// Install Packages\n",
	"%install-swiftpm-flags -c release\n",
	"%install-location /swift/packages\n",
	"\n",
	"%install '.package(url: \"https://github.com/google/swift-benchmark\", .branch(\"master\"))' Benchmark\n",
	"%install '.package(url: \"https://github.com/vojtamolda/Plotly.swift.git\", from: \"0.3.1\")' Plotly\n",
	"%install '.package(url: \"https://github.com/t-ae/Swim.git\", from: \"3.9.0\")' Swim\n",
	"\n",
	"// Clear Output\n",
	"print(\"\\u{001B}[2J\")"
	],
	"execution_count": 1,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"\r\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "TSrcgMtVubRa",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import TensorFlow\n",
	"import Benchmark"
	],
	"execution_count": 2,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "-DPUrPuNtIYW",
	"colab_type": "text"
	},
	"source": [
	"## Differentiable PDE Solver\n",
	"\n",
	"The next cell imports three different implementations differentiable shallow water PDE solver from a cloned git repository. The full source code and a readme file can be found in [this GitHub gist](https://gist.github.com/bd85033cf62877e6f8ada68b8bbb32a0.git).\n"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "OmiNmB4e2z4h",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"%include \"Solver/Solution.swift\"\n",
	"%include \"Solver/ArrayLoopSolution.swift\"\n",
	"%include \"Solver/TensorLoopSolution.swift\"\n",
	"%include \"Solver/TensorSliceSolution.swift\"\n",
	"%include \"Solver/TensorConvSolution.swift\""
	],
	"execution_count": 3,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "3TAhpc4D3ZQl",
	"colab_type": "text"
	},
	"source": [
	"## Benchmarks\n",
	"\n",
	"The following code runs a simulation of a water surface behavior in a rectangular bathtub. There's an initial \"splash\" at the begining. The splash generates surface gravity waves that propagate away from the center and reflect off the domain walls. There's three different versions, one for each implementation of the solver.\n",
	"\n",
	"Implementations that use the `Tensor` type for numerical values also acept the `device` argument. This allows them to run with XLA acceleration."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"jupyter": {
	"source_hidden": false,
	"outputs_hidden": false
	},
	"nteract": {
	"transient": {
	"deleting": false
	}
	},
	"id": "N2ZR02DGpKTn",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"let n = 256\n",
	"let duration = 512"
	],
	"execution_count": 4,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "LqDnHRBGHwbX",
	"colab_type": "text"
	},
	"source": [
	"#### A - `ArrayLoopSolution`"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "G3ibX3LSt38i",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"func splashArrayLoop() {\n",
	" var initialWaterLevel = [[Float]](repeating: [Float](repeating: 0.0, count: n), count: n)\n",
	" initialWaterLevel[n / 2][n / 2] = 100\n",
	"\n",
	" let initialSolution = ArrayLoopSolution(waterLevel: initialWaterLevel)\n",
	" _ = [ArrayLoopSolution](evolve: initialSolution, for: duration)\n",
	"}"
	],
	"execution_count": 5,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "qV01lB6KH1dC",
	"colab_type": "text"
	},
	"source": [
	"#### B - `TensorLoopSolution`"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "UrhjMlGy3xeG",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"func splashTensorLoop(on device: Device) {\n",
	" var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
	" initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
	"\n",
	" let initialSolution = TensorLoopSolution(waterLevel: initialWaterLevel)\n",
	" _ = [TensorLoopSolution](evolve: initialSolution, for: duration)\n",
	"}"
	],
	"execution_count": 6,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "o5sFqQCkH45N",
	"colab_type": "text"
	},
	"source": [
	"#### C - `TensorSliceSolution`"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "tCHD-P3wgGN-",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"func splashTensorSlice(on device: Device) {\n",
	" var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
	" initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
	"\n",
	" let initialSolution = TensorSliceSolution(waterLevel: initialWaterLevel)\n",
	" _ = [TensorSliceSolution](evolve: initialSolution, for: duration)\n",
	"}"
	],
	"execution_count": 7,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "mbVZjvJoNjFD",
	"colab_type": "text"
	},
	"source": [
	"#### D - `TensorConvSolution`"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "KiBDfV0XNkZP",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"func splashTensorConv(on device: Device) {\n",
	" var initialWaterLevel = Tensor<Float>(zeros: [n, n], on: device)\n",
	" initialWaterLevel[n / 2][n / 2] = Tensor<Float>(100, on: device)\n",
	"\n",
	" let initialSolution = TensorConvSolution(waterLevel: initialWaterLevel)\n",
	" _ = [TensorConvSolution](evolve: initialSolution, for: duration)\n",
	"}"
	],
	"execution_count": 8,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "MKYrWCE5s3ob",
	"colab_type": "text"
	},
	"source": [
	"## Results\n",
	"\n",
	"Not yet conclusive..."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "FfA4OKIPgtQx",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"let splashBenchmarks = BenchmarkSuite(name: \"Shallow Water PDE Solver\",\n",
	" settings: Iterations(10), WarmupIterations(2)) { suite in\n",
	" suite.benchmark(\"Array Loop\") { splashArrayLoop() }\n",
	"\n",
	" // This is at least 1000x slower. One can easily grow old while running the benchmark :(\n",
	" //suite.benchmark(\"Tensor Loop\") { splashTensorLoop(on: Device.default) }\n",
	" //suite.benchmark(\"Tensor Loop (XLA)\") { splashTensorLoop(on: Device.defaultXLA) }\n",
	"\n",
	" suite.benchmark(\"Tensor Slice\") { splashTensorSlice(on: Device.default) }\n",
	" suite.benchmark(\"Tensor Slice (XLA)\") { splashTensorSlice(on: Device.defaultXLA) }\n",
	"\n",
	" suite.benchmark(\"Tensor Conv\") { splashTensorConv(on: Device.default) }\n",
	" suite.benchmark(\"Tensor Conv (XLA)\") { splashTensorConv(on: Device.defaultXLA) }\n",
	"}"
	],
	"execution_count": 9,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Uhq7IKkLcw25",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 238
	},
	"outputId": "208b1369-359b-4f58-c806-b8e815f6631b"
	},
	"source": [
	"Benchmark.main([splashBenchmarks])"
	],
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"running Shallow Water PDE Solver: Array Loop... done! (363294.84 ms)\n",
	"running Shallow Water PDE Solver: Tensor Slice... done! (9566.95 ms)\n",
	"running Shallow Water PDE Solver: Tensor Slice (XLA)... done! (17147.61 ms)\n",
	"running Shallow Water PDE Solver: Tensor Conv... done! (83571.66 ms)\n",
	"running Shallow Water PDE Solver: Tensor Conv (XLA)... done! (19268.14 ms)\n",
	"\n",
	"name time std iterations warmup \n",
	"---------------------------------------------------------------------------------------------------\n",
	"Shallow Water PDE Solver.Array Loop 30268327673.500 ns ± 0.40 % 10 60451444387 ns\n",
	"Shallow Water PDE Solver.Tensor Slice 741456533.500 ns ± 4.13 % 10 2012324321 ns\n",
	"Shallow Water PDE Solver.Tensor Slice (XLA) 1359265173.500 ns ± 2.19 % 10 3534559405 ns\n",
	"Shallow Water PDE Solver.Tensor Conv 7026560678.500 ns ± 2.92 % 10 13381071502 ns\n",
	"Shallow Water PDE Solver.Tensor Conv (XLA) 1580760053.500 ns ± 1.52 % 10 3472683097 ns\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}
	#include <math.h>
	#include <time.h>
	#include <stdio.h>
	#include <stdlib.h>


	const int n = 256;
	const int duration = 512;

	const float c = 340.0f;
	const float alpha = 0.00001f;


	int idx(const int x, const int y) {
	return y * n + x;
	}

	float laplace(const float* const u, const int x, const int y) {
	const float dx = 1.0f / n;

	return (-4 * u[idx(x,y)] \
	+ u[idx(x+1,y)] + u[idx(x-1,y)] \
	+ u[idx(x,y+1)] + u[idx(x,y-1)]) / dx / dx;
	}

	void timestep(float* u, const float* const u1, const float* const u0) {
	const float dx = 1.0f / n;
	const float dt = (sqrt(alpha * alpha + dx * dx / 3.0f) - alpha) / c;

	for (int x = 1; x < n-1; x++) {
	for (int y = 1; y < n-1; y++) {
	u[idx(x,y)] = 2 * u1[idx(x,y)] \
	+ (c * c * dt * dt + c * alpha * dt) * laplace(u1, x, y) \
	- u0[idx(x,y)] - c * alpha * dt * laplace(u0, x, y);
	}
	}
	}

	void splash() {
	float* u = (float) calloc(nn, sizeof(float));

	float* u1 = (float) calloc(nn, sizeof(float));
	u1[idx(n/2, n/2)] = 100.0f;

	float* u0 = (float) calloc(nn, sizeof(float));
	u0[idx(n/2, n/2)] = 100.0f;

	for (int t = 0; t < duration; t++) {
	timestep(u, u1, u0);

	float* tmp = u0;
	u0 = u1;
	u1 = u;
	u = tmp;
	}

	free(u);
	free(u1);
	free(u0);
	}

	int main(int argc, const char * argv[]) {
	printf("Warmup...\n");
	for (int i = 0; i < 2; i++) {
	splash();
	}

	printf("Benchmarks...\n");
	for (int i = 0; i < 10; i++) {
	clock_t start = clock();
	splash();
	long double milisecs = (long double)(clock() - start) * 1000.0 / CLOCKS_PER_SEC;

	printf("%Lf ms\n", milisecs);
	}

	return 0;
	}