cigrainger · July 16, 2024 18:19
diff --git a/elixir_za.livmd b/elixir_za.livmd
 # Elixir ZA: Boost your data team's productivity with Explorer

 ```elixir
 Mix.install(
  [
    {:explorer, "~> 0.8.3"},
    {:kino, "~> 0.13.2"},
    {:kino_vega_lite, "~> 0.1.13"},
    {:kino_explorer, "~> 0.1.20"},
    {:adbc, "~> 0.6.0"},
    {:nx, "~> 0.7.3"},
    {:exla, "~> 0.7.3"},
    {:tucan, "~> 0.3.1"},
    {:recase, "~> 0.8.1"}
  ],
  config: [
    adbc: [drivers: [:sqlite]],
    nx: [
      default_backend: EXLA.Backend,
      default_defn_options: [compiler: EXLA, client: :host]
    ]
  ]
 )
 ```

 ## ADBC and San Francisco Restaurants

 ```elixir
 require Explorer.DataFrame, as: DataFrame
 require Explorer.Series, as: Series
 ```

 ```elixir
 {:ok, db} = Kino.start_child({Adbc.Database, driver: :sqlite, uri: "file:/Users/chris/Downloads/sfscores.sqlite"})
 {:ok, conn} = Kino.start_child({Adbc.Connection, database: db})
 ```

 ```elixir
 tables = DataFrame.from_query!(conn, "SELECT name FROM sqlite_master WHERE type='table';", [])
 ```

 ```elixir
 dataframes =
  for table_name <- Series.to_list(tables[:name]), into: %{} do
    {table_name,
     conn
     |> DataFrame.from_query!("select * from #{table_name}", [])
     |> DataFrame.rename_with(&Recase.to_snake/1)
    }
  end
 ```

 ```elixir
 businesses = dataframes["businesses"]
 ```

 ```elixir
 owners =
  businesses
  |> DataFrame.select(&String.starts_with?(&1, "owner_"))
  |> DataFrame.distinct()
  |> DataFrame.rename_with(fn
    "owner_name" = name -> name
    "owner_" <> name -> name
  end)
 ```

 ```elixir
 business_owners = DataFrame.select(businesses, ["business_id", "owner_name"])
 ```

 ```elixir
 largest_owners =
  business_owners
  |> DataFrame.group_by("owner_name")
  |> DataFrame.summarise(business_count: Series.size(business_id))
  |> DataFrame.sort_by(desc: business_count)
 ```

 ```elixir
 businesses = DataFrame.select(businesses, &(not String.starts_with?(&1, "owner_")))
 ```

 ```elixir
 inspections = dataframes["inspections"]
 ```

 ```elixir
 violations = dataframes["violations"]
 ```

 ```elixir
 inspections =
  DataFrame.mutate(inspections,
    business_id: cast(business_id, :integer),
    date: date |> strptime("%Y%m%d") |> cast(:date),
    type: cast(type, :category)
  )
 ```

 ```elixir
 violations =
  DataFrame.mutate(violations,
    business_id: cast(business_id, :integer),
    date: date |> strptime("%Y%m%d") |> cast(:date),
    risk_category: cast(risk_category, :category),
    violation_type_id: cast(violation_type_id, :category)
  )
 ```

 ```elixir
 Series.distinct(violations[:risk_category])
 ```

 ```elixir
 count_by_risk_category =
  violations
  |> DataFrame.group_by([:business_id, :risk_category])
  |> DataFrame.summarise(count: Series.size(business_id))
 ```

 ```elixir
 Tucan.bar(count_by_risk_category, "risk_category", "count", width: 400, height: 200)
 ```

 ```elixir
 weighted_scores =
  count_by_risk_category
  |> DataFrame.mutate(
    weighted_score:
      cond do
        risk_category == "High Risk" -> count * 3
        risk_category == "Moderate Risk" -> count * 2
        risk_category == "Low Risk" -> count * 1
      end
  )
  |> DataFrame.group_by(:business_id)
  |> DataFrame.summarise(weighted_score: sum(weighted_score), total_violations: sum(count))
 ```

 ```elixir
 weighted_scores
 |> DataFrame.join(businesses)
 |> DataFrame.join(business_owners, how: :left)
 |> DataFrame.join(largest_owners, how: :left)
 |> DataFrame.sort_by(desc: weighted_score)
 |> Tucan.bubble("weighted_score", "total_violations", "business_count", width: 600, height: 500, tooltip: :data)
 ```

 ```elixir
 x = Series.to_tensor(weighted_scores[:total_violations])
 y = Series.to_tensor(weighted_scores[:weighted_score])
 ```

 ```elixir
 defmodule Correlation do
  import Nx.Defn

  defn pearson(x, y) do
    mean_x = Nx.mean(x)
    mean_y = Nx.mean(y)

    diff_x = x - mean_x
    diff_y = y - mean_y

    numerator = Nx.sum(diff_x * diff_y)
    denominator = Nx.sqrt(Nx.sum(diff_x * diff_x) * Nx.sum(diff_y * diff_y))

    numerator / denominator
  end
 end
 ```

 ```elixir
 Correlation.pearson(weighted_scores[:total_violations], weighted_scores[:weighted_score])
 ```
	# Elixir ZA: Boost your data team's productivity with Explorer

	```elixir
	Mix.install(
	[
	{:explorer, "~> 0.8.3"},
	{:kino, "~> 0.13.2"},
	{:kino_vega_lite, "~> 0.1.13"},
	{:kino_explorer, "~> 0.1.20"},
	{:adbc, "~> 0.6.0"},
	{:nx, "~> 0.7.3"},
	{:exla, "~> 0.7.3"},
	{:tucan, "~> 0.3.1"},
	{:recase, "~> 0.8.1"}
	],
	config: [
	adbc: [drivers: [:sqlite]],
	nx: [
	default_backend: EXLA.Backend,
	default_defn_options: [compiler: EXLA, client: :host]
	]
	]
	)
	```

	## ADBC and San Francisco Restaurants

	```elixir
	require Explorer.DataFrame, as: DataFrame
	require Explorer.Series, as: Series
	```

	```elixir
	{:ok, db} = Kino.start_child({Adbc.Database, driver: :sqlite, uri: "file:/Users/chris/Downloads/sfscores.sqlite"})
	{:ok, conn} = Kino.start_child({Adbc.Connection, database: db})
	```

	```elixir
	tables = DataFrame.from_query!(conn, "SELECT name FROM sqlite_master WHERE type='table';", [])
	```

	```elixir
	dataframes =
	for table_name <- Series.to_list(tables[:name]), into: %{} do
	{table_name,
	conn
	\|> DataFrame.from_query!("select * from #{table_name}", [])
	\|> DataFrame.rename_with(&Recase.to_snake/1)
	}
	end
	```

	```elixir
	businesses = dataframes["businesses"]
	```

	```elixir
	owners =
	businesses
	\|> DataFrame.select(&String.starts_with?(&1, "owner_"))
	\|> DataFrame.distinct()
	\|> DataFrame.rename_with(fn
	"owner_name" = name -> name
	"owner_" <> name -> name
	end)
	```

	```elixir
	business_owners = DataFrame.select(businesses, ["business_id", "owner_name"])
	```

	```elixir
	largest_owners =
	business_owners
	\|> DataFrame.group_by("owner_name")
	\|> DataFrame.summarise(business_count: Series.size(business_id))
	\|> DataFrame.sort_by(desc: business_count)
	```

	```elixir
	businesses = DataFrame.select(businesses, &(not String.starts_with?(&1, "owner_")))
	```

	```elixir
	inspections = dataframes["inspections"]
	```

	```elixir
	violations = dataframes["violations"]
	```

	```elixir
	inspections =
	DataFrame.mutate(inspections,
	business_id: cast(business_id, :integer),
	date: date \|> strptime("%Y%m%d") \|> cast(:date),
	type: cast(type, :category)
	)
	```

	```elixir
	violations =
	DataFrame.mutate(violations,
	business_id: cast(business_id, :integer),
	date: date \|> strptime("%Y%m%d") \|> cast(:date),
	risk_category: cast(risk_category, :category),
	violation_type_id: cast(violation_type_id, :category)
	)
	```

	```elixir
	Series.distinct(violations[:risk_category])
	```

	```elixir
	count_by_risk_category =
	violations
	\|> DataFrame.group_by([:business_id, :risk_category])
	\|> DataFrame.summarise(count: Series.size(business_id))
	```

	```elixir
	Tucan.bar(count_by_risk_category, "risk_category", "count", width: 400, height: 200)
	```

	```elixir
	weighted_scores =
	count_by_risk_category
	\|> DataFrame.mutate(
	weighted_score:
	cond do
	risk_category == "High Risk" -> count * 3
	risk_category == "Moderate Risk" -> count * 2
	risk_category == "Low Risk" -> count * 1
	end
	)
	\|> DataFrame.group_by(:business_id)
	\|> DataFrame.summarise(weighted_score: sum(weighted_score), total_violations: sum(count))
	```

	```elixir
	weighted_scores
	\|> DataFrame.join(businesses)
	\|> DataFrame.join(business_owners, how: :left)
	\|> DataFrame.join(largest_owners, how: :left)
	\|> DataFrame.sort_by(desc: weighted_score)
	\|> Tucan.bubble("weighted_score", "total_violations", "business_count", width: 600, height: 500, tooltip: :data)
	```

	```elixir
	x = Series.to_tensor(weighted_scores[:total_violations])
	y = Series.to_tensor(weighted_scores[:weighted_score])
	```

	```elixir
	defmodule Correlation do
	import Nx.Defn

	defn pearson(x, y) do
	mean_x = Nx.mean(x)
	mean_y = Nx.mean(y)

	diff_x = x - mean_x
	diff_y = y - mean_y

	numerator = Nx.sum(diff_x * diff_y)
	denominator = Nx.sqrt(Nx.sum(diff_x * diff_x) * Nx.sum(diff_y * diff_y))

	numerator / denominator
	end
	end
	```

	```elixir
	Correlation.pearson(weighted_scores[:total_violations], weighted_scores[:weighted_score])
	```
No results found