Created
September 10, 2020 13:16
-
-
Save elvanja/9e63ff2306481555fd194c5631cc4f95 to your computer and use it in GitHub Desktop.
Build Elasticsearch mapping schema from given Elixir struct's typespec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule DataKiosk.Utils.BuildElasticsearchMapping do | |
@moduledoc """ | |
██╗ ██╗███████╗██████╗ ███████╗ ██████╗ ███████╗ ██████╗ ██████╗ █████╗ ██████╗ ██████╗ ███╗ ██╗███████╗ | |
██║ ██║██╔════╝██╔══██╗██╔════╝ ██╔══██╗██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔════╝ ██╔═══██╗████╗ ██║██╔════╝ | |
███████║█████╗ ██████╔╝█████╗ ██████╔╝█████╗ ██║ ██║██████╔╝███████║██║ ███╗██║ ██║██╔██╗ ██║███████╗ | |
██╔══██║██╔══╝ ██╔══██╗██╔══╝ ██╔══██╗██╔══╝ ██║ ██║██╔══██╗██╔══██║██║ ██║██║ ██║██║╚██╗██║╚════██║ | |
██║ ██║███████╗██║ ██║███████╗ ██████╔╝███████╗ ██████╔╝██║ ██║██║ ██║╚██████╔╝╚██████╔╝██║ ╚████║███████║ | |
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚══════╝ ╚═════╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ | |
Builds Elasticsearch mapping for given module. | |
Inspired by ["How to get typespec information from Elixir at runtime"](https://gist.github.com/JEG2/1685a9df2274ca5cf866122fa2dbc42d) gist. | |
See also [dialyxir issue 411](https://github.com/jeremyjh/dialyxir/issues/411). | |
It has been tested on several typespecs and suits this project's needs just fine. | |
But, it may (and most likely will) not work on all typespecs out there, simply because it is tailored for specific usage. | |
Consider yourself warned. | |
That being said, here is an example. Given these modules: | |
``` | |
defmodule Plant do | |
@type type :: :fruit | :vegetable | nil | |
@type t :: %__MODULE__{ | |
type: type(), | |
name: String.t(), | |
tags: list(atom()) | |
} | |
defstruct [ | |
:type, | |
:name, | |
:tags | |
] | |
end | |
defmodule Basket do | |
@type t :: %__MODULE__{ | |
plants: list(Plant.t()), | |
weight: Decimal.t() | |
} | |
defstruct [ | |
:plants, | |
:weight | |
] | |
end | |
``` | |
The resulting Elasticsearch mapping would be: | |
``` | |
%{ | |
plants: %{ | |
properties: %{ | |
name: %{type: "text"}, | |
tags: %{type: "keyword"}, | |
type: %{type: "keyword"} | |
}, | |
type: "nested" | |
}, | |
weight: %{scaling_factor: 100, type: "scaled_float"} | |
} | |
``` | |
Notes: | |
- plants list is mapped as [nested](https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html) type | |
- it does not require to be explicitly defined as list/array since that is automatic for Elasticsearch | |
- `Plant.type` is converted to [keyword](https://www.elastic.co/guide/en/elasticsearch/reference/current/keyword.html#keyword-field-type) | |
- same thing for `Plant.tags`, both have fixed values | |
- and finally `Basket.plants` is also converted as nested `Plant` | |
""" | |
require Logger | |
def given(module), do: %{properties: to_es(module, :t)} | |
defp to_es(module, type) do | |
{:ok, {^module, [{:abstract_code, {:raw_abstract_v1, attributes}}]}} = | |
module | |
|> :code.which() | |
|> :beam_lib.chunks([:abstract_code]) | |
all_types = | |
attributes | |
|> Enum.filter(fn attribute -> | |
case attribute do | |
{:attribute, _, :type, _} -> true | |
_ -> false | |
end | |
end) | |
|> Enum.map(fn {:attribute, _, :type, {name, type_spec, _}} -> | |
{name, type_spec} | |
end) | |
|> Enum.into(%{}) | |
main_type_spec = Map.get(all_types, type) | |
user_types = | |
all_types | |
|> Map.delete(type) | |
|> Enum.map(fn {name, type_spec} -> | |
case to_es(name, type_spec, []) do | |
{^name, %{properties: {_, spec}, type: "nested"}} -> {name, spec} | |
result -> result | |
end | |
end) | |
case main_type_spec do | |
{:type, _, :map, fields} -> | |
fields | |
|> Enum.map(fn {:type, _, :map_field_exact, [{:atom, _, field}, spec]} -> | |
to_es(field, spec, user_types) | |
end) | |
|> Enum.reject(&is_nil/1) | |
|> Enum.into(%{}) | |
spec -> | |
to_es(type, spec, []) | |
end | |
end | |
defp to_es(:__struct__, _, _), do: nil | |
defp to_es(_, {:atom, _, value}, _), do: value | |
defp to_es(field, {:type, _, :list, [type]}, user_types), | |
do: to_es(field, type, user_types) | |
defp to_es(field, {:type, _, :union, types}, user_types) do | |
specs = | |
types | |
|> Enum.map(&to_es(field, &1, user_types)) | |
|> Enum.reject(&is_nil/1) | |
if Enum.all?(specs, &is_atom/1) do | |
# module atoms are already handled by `&to_es/2` so here we have a list of atom values | |
{field, %{type: "keyword"}} | |
else | |
List.first(specs) | |
end | |
end | |
defp to_es(field, {:remote_type, _, [{:atom, _, String}, _, _]}, _), | |
do: {field, %{type: "text"}} | |
defp to_es(field, {:remote_type, _, [{:atom, _, Decimal}, _, _]}, _), | |
do: {field, %{type: "scaled_float", scaling_factor: 100}} | |
defp to_es(field, {:remote_type, _, [{:atom, _, module}, {:atom, 0, type}, _]}, _), | |
do: {field, %{type: "nested", properties: to_es(module, type)}} | |
defp to_es(field, {:type, _, :integer, _}, _), do: {field, %{type: "long"}} | |
defp to_es(field, {:type, _, :boolean, _}, _), do: {field, %{type: "boolean"}} | |
defp to_es(field, {:type, _, :map, _}, _), do: {field, %{type: "object"}} | |
defp to_es(field, {:type, _, :atom, _}, _), do: {field, %{type: "keyword"}} | |
defp to_es(field, {:user_type, _, type, _}, user_types), do: {field, user_types[type]} | |
defp to_es(field, spec, _) do | |
Logger.error("could not parse spec for field #{field}, spec: #{inspect(spec)}") | |
{field, :unknown} | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment