Skip to content

Instantly share code, notes, and snippets.

@aymanosman
Created August 26, 2024 18:20
Show Gist options
  • Save aymanosman/1df34798e548805a7cb4743eb70c639a to your computer and use it in GitHub Desktop.
Save aymanosman/1df34798e548805a7cb4743eb70c639a to your computer and use it in GitHub Desktop.
study-machine-learning-in-elixir.livemd

Machine Learning in Elixir

Mix.install([
  {:axon, "~> 0.5"},
  {:nx, "~> 0.5"},
  {:exla, "~> 0.5"},
  {:explorer, "~> 0.5"},
  {:kino, "~> 0.8"},
  {:kino_explorer, "~> 0.1.21"},
  {:benchee, github: "bencheeorg/benchee", override: true},
  {:vega_lite, "~> 0.1.9"},
  {:kino_vega_lite, "~> 0.1.13"}
])

Prologue

require Explorer.DataFrame, as: DF

1. Make Machines That Learn

iris = Explorer.Datasets.iris()
normalized_iris =
  iris
  |> DF.mutate(
    for col <- across(~r/(sepal|petal)_(length|width)/) do
      {col.name, col - mean(col) / standard_deviation(col)}
    end
  )
  |> DF.mutate(species: cast(species, :category))
for df <- [iris, normalized_iris] do
  Map.take(DF.dtypes(df), ["species"])
end
shuffled_normalized_iris = DF.shuffle(normalized_iris)
train_df = DF.slice(shuffled_normalized_iris, 0..119)
test_df = DF.slice(shuffled_normalized_iris, 120..149)
feature_columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]

from_frame = fn frame ->
  x = Nx.stack(frame[feature_columns], axis: -1)
  y = frame["species"] |> Nx.stack(axis: -1) |> Nx.equal(Nx.iota({1, 3}, axis: -1))
  {x, y}
end
train_data = from_frame.(train_df)
test_data = from_frame.(test_df)
model =
  Axon.input("iris_features", shape: {nil, 4})
  |> Axon.dense(3, activation: :softmax)
plot =
  VegaLite.new()
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "step", type: :quantitative)
  |> VegaLite.encode_field(:y, "loss", type: :quantitative)
  |> Kino.VegaLite.new()
  |> Kino.render()

trained_model_state =
  model
  |> Axon.Loop.trainer(:categorical_cross_entropy, :sgd)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.kino_vega_lite_plot(plot, "loss")
  |> Axon.Loop.run(Stream.repeatedly(fn -> train_data end), %{}, iterations: 500, epochs: 10)

See Appendix for an explanation of how a dataframe is converted to a tensor.

2. Get Comfortable with Nx

Nx.add(1, Nx.tensor([1, 2, 3]))
Nx.tensor([[1, 2, 3, 4]])
Nx.tensor([[1, 2, 3], [4, 5, 6]], names: [:x, :y])
a = Nx.tensor([[1, 2, 3], [4, 5, 6]])
Nx.to_binary(a)
<<1::64-signed-native, 2::64-signed-native, 3::64-signed-native>>
|> Nx.from_binary({:s, 64})
<<1::64-signed-native, 2::64-signed-native, 3::64-signed-native>>
|> Nx.from_binary({:s, 64})
|> Nx.reshape({1, 3}, names: [:x, :y])

Tensors are Immutable

...

a = Nx.tensor([1, 2, 3])

a
|> Nx.as_type({:f, 32})
|> Nx.reshape({1, 3, 1})
Nx.bitcast(a, {:f, 64})

Element-wise Unary Operations

a = [-1, -2, -3, 0, 1, 2, 3]
Enum.map(a, &abs/1)
a = Nx.tensor([-1, -2, -3, 0, 1, 2, 3])
Nx.abs(a)
a = Nx.tensor([[[-1, -2, -3], [-4, -5, -6]], [[1, 2, 3], [4, 5, 6]]])
Nx.abs(a)

Element-wise Binary Operations

a = [1, 2, 3]
b = [4, 5, 6]
Enum.zip_with(a, b, fn x, y -> x + y end)
a = Nx.tensor([1, 2, 3])
b = Nx.tensor([4, 5, 6])
Nx.add(a, b)
Nx.add(5, Nx.tensor([1, 2, 3]))
Nx.add(Nx.tensor([1, 2, 3]), Nx.tensor([[4, 5, 6], [7, 8, 9]]))

Reductions

revenues = Nx.tensor([85, 76, 42, 34, 46, 23, 52, 99, 22, 32, 85, 51])
Nx.sum(revenues)
revenues =
  Nx.tensor(
    [
      [21, 64, 86, 26, 74, 81, 38, 79, 70, 48, 85, 33],
      [64, 82, 48, 39, 70, 71, 81, 53, 50, 67, 36, 50],
      [68, 74, 39, 78, 95, 62, 53, 21, 43, 59, 51, 88],
      [47, 74, 97, 51, 98, 47, 61, 36, 83, 55, 74, 43]
    ],
    names: [:year, :month]
  )
Nx.sum(revenues)
Nx.sum(revenues, axes: [:year])
Nx.sum(revenues, axes: [:month])
Nx.mean(revenues, axes: [:year])
|> Nx.as_type({:f, 64}) # why always f32?

Going from def to defn

defmodule MyModule do
  import Nx.Defn

  defn adds_one(x) do
    # Nx.add(x, 1)
    (x + 1) |> print_value(label: "my value")
  end
end
MyModule.adds_one(Nx.tensor([1, 2, 3]))
defmodule Softmax do
  import Nx.Defn

  defn softmax(n) do
    Nx.exp(n) / Nx.sum(Nx.exp(n))
  end
end
key = Nx.Random.key(42)
{tensor, _key} = Nx.Random.uniform(key, shape: {1_000_000})

Benchee.run(
  %{
    "JIT with EXLA" => fn -> apply(EXLA.jit(&Softmax.softmax/1), [tensor]) end,
    "Regular Elixir" => fn ->
      Softmax.softmax(tensor)
    end
  },
  time: 10
)

3. Harness the Power of Math

  • linear algebra
  • linear transformations
  • probability theory, decision theory and information theory
  • reasoning about uncertainty
  • frequentist
  • bayes
  • automatic differentiation
defmodule BerryFarm do
  import Nx.Defn

  defn profits(trees) do
    -((trees - 1) ** 4) + trees ** 3 + trees ** 2
  end

  defn profits_derivative(trees) do
    grad(trees, &profits/1)
  end
end

trees = Nx.linspace(0, 3, n: 100)
profits = BerryFarm.profits(trees)
profits_derivative = BerryFarm.profits_derivative(trees)

plot = fn ->
  import VegaLite

  new(title: "Berry Profits and Profits Rate of Change")
  |> data_from_values(
    trees: Nx.to_flat_list(trees),
    profits: Nx.to_flat_list(profits),
    profits_derivative: Nx.to_flat_list(profits_derivative)
  )
  |> layers([
    new()
    |> mark(:line, interpolate: :basis)
    |> encode_field(:x, "trees", type: :quantitative)
    |> encode_field(:y, "profits", type: :quantitative),
    new()
    |> mark(:line, interpolate: :basis)
    |> encode_field(:x, "trees", type: :quantitative)
    |> encode_field(:y, "profits_derivative", type: :quantitative)
    |> encode(:color, value: "#ff0000")
  ])
end

plot.()
defmodule GradFun do
  import Nx.Defn

  defn my_function(x) do
    import Nx

    sum(exp(cos(x)))
    |> print_expr()
  end

  defn grad_my_function(x) do
    grad(x, &my_function/1) |> print_expr()
  end
end

GradFun.grad_my_function(Nx.tensor([1.0, 2.0, 3.0]))
  • chain rule

4. Optimize Everything

  • loss function
  • objective function
  • maximum likelihood estimation (MLE)
  • cross-entropy
  • mean squared error
  • convergence
  • regularize to generalize
  • overfitting, underfitting and capacity
  • complexity penalties
  • weight decay, L2-Norm (distance) $ \sqrt{x^2 + y^2} $
  • $ loss + \lambda * penalty $
  • early stopping, validation set
key = Nx.Random.key(42)
{true_params, key} = Nx.Random.uniform(key, shape: {32})

true_function = fn params, x ->
  Nx.dot(x, params)
end

generate = fn true_function, key ->
  {x, key} = Nx.Random.uniform(key, shape: {10_000, 32})
  y = true_function.(true_params, x)
  data = Enum.zip(Nx.to_batched(x, 1), Nx.to_batched(y, 1))
  {data, key}
end

{train_data, key} = generate.(true_function, key)
{test_data, _key} = generate.(true_function, key)
# stream = Nx.to_batched(Nx.tensor([[1], [2]]), 1)
# Enum.each(stream, fn thing -> IO.inspect(thing, label: "thing") end)
defmodule SGD do
  import Nx.Defn

  defn init_random_params(key) do
    Nx.Random.uniform(key, shape: {32, 1})
  end

  defn model(params, x) do
    Nx.dot(x, params)
  end

  defn mean_squared_error(y_true, y_pred) do
    ((y_true - y_pred) ** 2)
    |> Nx.mean(axes: [-1])
  end

  defn loss(y_true, y_pred) do
    mean_squared_error(y_true, y_pred)
  end

  defn objective(params, x, y_true) do
    y_pred = model(params, x)
    loss(y_true, y_pred)
  end

  defn step(params, x, y_true) do
    {loss, grad} =
      value_and_grad(params, fn params ->
        objective(params, x, y_true)
      end)

    # learning rate
    alpha = 1.0e-2
    {loss, params - alpha * grad}
  end

  def eval(params, data) do
    data
    |> Enum.map(fn {x, y} ->
      pred = model(params, x)
      loss(y, pred)
    end)
    |> Enum.reduce(0, &Nx.add/2)
  end

  def train(data, iterations, key) do
    {params, _key} = init_random_params(key)
    loss = Nx.tensor(0.0)

    {_, trained_params} =
      for i <- 1..iterations, reduce: {loss, params} do
        {loss, params} ->
          for {{x, y}, j} <- Enum.with_index(data), reduce: {loss, params} do
            {loss, params} ->
              {batch_loss, new_params} = step(params, x, y)
              avg_loss = Nx.add(Nx.mean(batch_loss), loss) |> Nx.divide(j + 1)
              IO.write("\rEpoch: #{i}, Loss: #{Nx.to_number(avg_loss)}")
              {avg_loss, new_params}
          end
      end

    trained_params
  end
end
key = Nx.Random.key(100)
{random_params, _key} = SGD.init_random_params(key)
SGD.eval(random_params, test_data)
key = Nx.Random.key(0)
trained_params = SGD.train(train_data, 1, key)
SGD.eval(trained_params, test_data)
  • making it fail
key = Nx.Random.key(42)
true_function = fn params, x ->
  Nx.dot(x, params) |> Nx.cos()
end

{train_data, key} = generate.(true_function, key)
{test_data, _key} = generate.(true_function, key)

key = Nx.Random.key(0)
trained_params = SGD.train(train_data, 10, key)
SGD.eval(trained_params, test_data)
  • hyperparameter search
  • evolutionary algorithm
  • grid search

Appendix: Data Frames

DF.new(x: [1, 2, 3], y: [10, 20, 30], z: ["a", "b", "c"])
|> DF.mutate(x2: x + 1, x: count(x) + 1)

Appendix: Table Protocol

DF.new(sepal_length: [1, 2, 3], sepal_width: [4, 5, 6])
|> Table.to_rows()
|> Enum.to_list()
for %{"x" => x, "y" => y} <-
      Table.to_rows(
        DF.new(
          x: [1, 2, 3],
          y: [4, 5, 6]
        )
      ) do
  [x, y]
end
|> Nx.tensor()
DF.new(
  x: [1, 2, 3],
  y: [4, 5, 6]
)
|> Nx.stack(axis: -1)

Appendix: Nx Serving

defmodule MyDefn do
  import Nx.Defn

  defn print_and_multiply(x) do
    x = print_value(x, label: "debug")
    x * 2
  end
end
serving = Nx.Serving.new(fn opts -> Nx.Defn.jit(&MyDefn.print_and_multiply/1, opts) end)
batch = Nx.Batch.stack([Nx.tensor([1, 2, 3])])
Nx.Serving.run(serving, batch)
serving =
  Nx.Serving.jit(&MyDefn.print_and_multiply/1)
  |> Nx.Serving.client_preprocessing(fn input -> {Nx.Batch.stack(input), :client_info} end)
  |> Nx.Serving.client_postprocessing(fn {output, _metadata}, _client_info -> output end)

Nx.Serving.run(serving, [Nx.tensor([1, 2]), Nx.tensor([3, 4])])

Appendix: VegaLite

plot = fn ->
  import VegaLite

  new()
  |> data_from_url("https://vega.github.io/editor/data/weather.csv")
  |> transform(filter: "datum.location == 'Seattle'")
  |> concat([
    new()
    |> mark(:bar)
    |> encode_field(:x, "date", time_unit: :month, type: :ordinal)
    |> encode_field(:y, "precipitation", aggregate: :mean),
    new()
    |> mark(:point)
    |> encode_field(:x, "temp_min", bin: true)
    |> encode_field(:y, "temp_max", bin: true)
    |> encode(:size, aggregate: :count)
  ])
end

plot.()
alias VegaLite, as: Vl

chart =
  Vl.new(width: 400, height: 400)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "x", type: :quantitative)
  |> Vl.encode_field(:y, "y", type: :quantitative)
  |> Kino.VegaLite.new()
  |> Kino.render()

for i <- 1..300 do
  point = %{x: i / 10, y: :math.sin(i / 10)}
  Kino.VegaLite.push(chart, point)
  Process.sleep(25)
end

:ok
Stream.zip(
  Nx.to_batched(Nx.tensor([1, 2, 3]), 1),
  Nx.to_batched(Nx.tensor([1, 2, 3]), 1)
)
|> Enum.take(2)
alias VegaLite, as: Vl

plot =
  Vl.new()
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "step", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Kino.VegaLite.new()
  |> Kino.render()

model
|> Axon.Loop.loop(fn {x, y}, state ->
  %{parameters: params, optimizer_state: optim_state} = state

  gradients = grad(params, objective_fn.(&1, inputs, targets))
  {updates, new_optim_state} = optimizer.(optim_state, params, gradients)

  new_params = apply_updates(params, updates)

  # Shown for simplicity, you can optimize this by calculating preds
  # along with the gradient calculation
  preds = model_fn.(params, inputs)

  %{
    y_true: targets,
    y_pred: preds,
    parameters: new_params,
    optimizer_state: optim_state
  }
end)
|> Axon.Loop.kino_vega_lite_plot(plot, "loss")
|> Axon.Loop.run(
  Stream.zip(
    1..100,
    Stream.map(1..100, &(&1 * 2))
  )
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment