Skip to content

Instantly share code, notes, and snippets.

@c-spencer
Last active August 29, 2015 14:10
Show Gist options
  • Save c-spencer/3088318056c0ff379de9 to your computer and use it in GitHub Desktop.
Save c-spencer/3088318056c0ff379de9 to your computer and use it in GitHub Desktop.
Simple and incomplete MessageFormat compiler for Elixir
defmodule Plural do
require Record
Record.defrecordp :xmlAttribute, Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
Record.defrecordp :xmlText, Record.extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl")
defmacro __using__(_env) do
quote do
@before_compile Plural
Module.register_attribute(__MODULE__, :plurals, accumulate: true)
defp plural(lang, n), do: plural(lang, n, :cardinal)
import Plural
end
end
defmacro plural(lang) do
quote do
@plurals unquote(lang)
end
end
defmacro __before_compile__(env) do
Module.get_attribute(env.module, :plurals)
|> Enum.into(HashSet.new)
|> Enum.map(&compile_lang(&1))
end
defp compile_lang(lang) do
load(lang)
|> Enum.map(fn ({k, v}) -> {k, parse(v)} end)
|> compile_rules(lang)
end
# Compiles a list of rules into a def
defp compile_rules(rules, lang) do
{ clauses, deps } = Enum.reduce(Enum.reverse(rules), { [], HashSet.new },
fn({name, {ast, deps}}, { clauses, alldeps }) ->
{ [{:->, [], [[ast], name]}|clauses], Set.union(alldeps, deps) }
end)
compiled_deps = Set.delete(deps, :n)
|> Enum.map(&quote(do: unquote(var(&1)) = unquote(compile_dep(&1))))
quote do
def plural(unquote(lang), unquote(var(:n)), :cardinal) do
unquote_splicing(compiled_deps)
cond do
unquote(clauses)
end
end
end
end
# Helper function to generate var references
defp var(name), do: {name, [], :prelude}
# Shared structure for v/f/t
defp after_decimal do
quote do: unquote(var(:n))
|> inspect
|> String.split(".")
|> Enum.at(1) || ""
end
# Compiles the index numbers needed for pluralising
defp compile_dep(:i) do
quote do: trunc(unquote(var(:n)))
end
defp compile_dep(:v) do
quote do: unquote(after_decimal) |> String.length
end
defp compile_dep(:f) do
quote do: unquote(after_decimal) |> Integer.parse
end
defp compile_dep(:t) do
quote do: unquote(after_decimal) |> String.strip(?0) |> String.length
end
# Load a langs plurals from the XML
defp load(lang) do
{:ok, f} = :file.read_file(__DIR__ <> "/plurals.xml")
{xml, _} = f
|> :binary.bin_to_list
|> :xmerl_scan.string
qs = "//pluralRules[contains(concat(' ', @locales, ' '), ' #{lang} ')]/pluralRule"
for el <- q(qs, xml) do
[xmlAttribute(value: count)] = q("./@count", el)
[xmlText(value: rule)] = q("./text()", el)
{ List.to_string(count), extract_rule(rule) }
end
end
defp q(s, xml) do
:xmerl_xpath.string(to_char_list(s), xml)
end
defp extract_rule(rule) do
Regex.run(~r/^[^@]*/, List.to_string(rule))
|> List.first
|> String.strip
end
# Parse a string into {ast, deps}
defp parse("") do
{true, HashSet.new}
end
defp parse(str) do
{tokens, deps} = tokenise(str, [], HashSet.new)
{parse_tree(tokens, [], []) |> compile, deps}
end
# Tokenise string, using simple recursive peeking
defp tokenise(str, tokens, deps) do
case str do
"" -> {Enum.reverse(tokens), deps}
<<"and", str::binary>> -> tokenise(str, [{:op,:and}|tokens], deps)
<<"or", str::binary>> -> tokenise(str, [{:op,:or}|tokens], deps)
<<"..", str::binary>> -> tokenise(str, [{:op,:range}|tokens], deps)
<<"!=", str::binary>> -> tokenise(str, [{:op,:neq}|tokens], deps)
<<"%", str::binary>> -> tokenise(str, [{:op,:mod}|tokens], deps)
<<"=", str::binary>> -> tokenise(str, [{:op,:eq}|tokens], deps)
<<",", str::binary>> -> tokenise(str, [{:op,:comma}|tokens], deps)
<<" ", str::binary>> -> tokenise(str, tokens, deps)
<<c::binary-size(1), str::binary>> when c == "n" or c == "i" or c == "f"
or c == "t" or c == "v" or c == "w" ->
atom = String.to_atom(c)
tokenise(str, [{:var,atom}|tokens], Set.put(deps, atom))
str ->
case Regex.run(~r/^[0-9]+/, str) do
[n] ->
len = String.length(n)
str = String.slice(str, len, String.length(str) - len)
{i, ""} = Integer.parse(n)
tokenise(str, [{:number, i}|tokens], deps)
nil -> {:error, "Couldn't parse rule.", str}
end
end
end
# Parse tokens into a tree, using a shunting-yard parser
@precedences %{ or: 1,
and: 2,
neq: 3, eq: 3,
mod: 4,
comma: 5,
range: 6 }
defp parse_tree([], [], [output]) do
output
end
defp parse_tree([], [op|opstack], output) do
push_op(op, [], opstack, output)
end
defp parse_tree([{:op, o1}|rest], [], output) do
parse_tree(rest, [o1], output)
end
defp parse_tree([{:op, o1}|rest], [o2|opstack], output) do
if @precedences[o1] <= @precedences[o2] do
push_op(o2, [{:op, o1}|rest], opstack, output)
else
parse_tree(rest, [o1,o2|opstack], output)
end
end
defp parse_tree([node|rest], opstack, output) do
parse_tree(rest, opstack, [node|output])
end
defp push_op(:comma, tokens, opstack, [r,{:list, vs}|output]) do
parse_tree(tokens, opstack, [{:list, [r|vs]}|output])
end
defp push_op(:comma, tokens, opstack, [r,l|output]) do
parse_tree(tokens, opstack, [{:list, [r,l]}|output])
end
defp push_op(op, tokens, opstack, [r,l|output]) do
parse_tree(tokens, opstack, [{:binary, op, l, r}|output])
end
# Compile out the tree into elixir forms
@op_map %{ or: :or,
and: :and,
neq: :!=, eq: :==,
mod: :rem }
defp compile({:number, n}), do: n
defp compile({:var, v}), do: var(v)
defp compile({:binary, :eq, l, {:list, vs}}) do
Enum.map(vs, &compile({:binary, :eq, l, &1}))
|> Enum.reduce(&quote do: unquote(&2) or unquote(&1))
end
defp compile({:binary, :eq, l, {:binary, :range, lr, rr}}) do
quote do
unquote(compile(l)) in unquote(compile(lr))..unquote(compile(rr))
end
end
defp compile({:binary, :neq, l, {:list, vs}}) do
quote do: !unquote(compile({:binary, :eq, l, {:list, vs}}))
end
defp compile({:binary, op, l, r}) do
{@op_map[op], [context: Elixir, import: Kernel], [compile(l), compile(r)]}
end
end
defmodule MessageFormat do
require Plural
defmacro __using__(_env) do
quote do
use Plural
end
end
# defines a name(lang, key, options) function
# eg compile_string(:t!, "en", "test", "my test string")
defmacro compile_string(name, lang, key, string) do
compiled = MessageFormat.compile(MessageFormat.parse(string), %{lang: lang})
quote do
Plural.plural unquote(lang)
def unquote(name)(unquote(lang), unquote(key), unquote({:args, [], Elixir})) do
String.strip(unquote(compiled))
end
end
end
# Parse a string to an ast
def parse(str) do
{:ok, tokens} = tokenise(str, { "", [], 0 })
tokens
|> Enum.filter(fn (t) -> t != "" end)
|> parse_tree([])
end
# Tokenise a string
defp tokenise("", { buffer, tokens, 0 }) do
{:ok, Enum.reverse [buffer|tokens]}
end
defp tokenise("", { _buffer, _tokens, _ }) do
{:error, "Unmatched opening bracket"}
end
defp tokenise(str, { buffer, tokens, b_depth }) do
<<c::binary-size(1), rest::binary>> = str
case { b_depth, c } do
{_, "{"} ->
tokenise(rest, { "", [:open, buffer | tokens], b_depth+1})
{n, "}"} when n > 0 ->
tokenise(rest, { "", [:close, buffer | tokens], b_depth-1})
{_, "}"} -> {:error, "Unmatched closing bracket"}
{n, ","} when n > 0 ->
tokenise(rest, { "", [:comma, buffer | tokens], b_depth })
{n, "#"} when n > 0 ->
tokenise(rest, { "", [:hash, buffer | tokens], b_depth })
{_, "\\"} ->
<<c::binary-size(1), rest::binary>> = rest
tokenise(rest, { buffer <> c, tokens, b_depth })
{_, c} ->
tokenise(rest, { buffer <> c, tokens, b_depth })
end
end
# Parse tokens out into an ast
defp parse_tree(tokens, olist) do
case tokens do
[:open | rest] ->
{ clause, rest } = parse_tree(rest, [])
clause = parse_clause(clause)
parse_tree(rest, [clause|olist])
[:close | rest] ->
{ Enum.reverse(olist), rest }
[x | rest] ->
parse_tree(rest, [x|olist])
[] ->
Enum.reverse(olist)
end
end
# takes a bracketed clause and returns either a string or a
# tuple describing the operation
defp parse_clause([op1, :comma, op2 | rest]) do
command = String.strip(op2)
formatter([op1, command | rest])
end
defp parse_clause(tokens), do: tokens
# recognise select/plural formatters
defp formatter([arg, "select", :comma|body]) do
{:select, arg, extract(body)}
end
defp formatter([arg, "plural", :comma|body]) do
{:plural, arg, extract(body)}
end
defp formatter(tokens), do: tokens
# Transform a list of tokens into a map
# [a b c d] -> %{"a"=>"b", "c"=>"d"}
defp extract(tokens) do
tokens
|> clean_tokens
|> extract_map(%{})
end
defp extract_map([key, value|rest], m) do
extract_map(rest, Map.put(m, String.strip(key), value))
end
defp extract_map([], m), do: m
defp clean_tokens(tokens) do
Enum.reduce(tokens, [], fn (r, acc) ->
if is_bitstring(r) do
case String.strip(r) do
"" -> acc
str -> [str|acc]
end
else
[r|acc]
end
end)
|> Enum.reverse
end
# Recursively compile lists
def compile(tokens, env) when is_list(tokens) do
tokens
|> Enum.map(fn (t) -> compile(t, env) end)
|> Enum.reduce(fn (right, left) ->
{:<>, [context: Elixir, import: Kernel], [left, right]}
end)
end
def compile({:select, arg, m}, env) do
arg = arg |> String.downcase |> String.to_atom
clauses = Enum.map(m, fn(x) ->
{ k, v } = x
{:->, [], [[k], compile(v, env)]}
end)
quote do
case unquote({:args, [], Elixir})[unquote(arg)] do
unquote(clauses)
end
end
end
def compile({:plural, arg, m}, env) do
arg = arg |> String.downcase |> String.to_atom
accessor = quote do
unquote({:args, [], Elixir})[unquote(arg)]
end
clauses = Enum.map(m, fn({ k, v }) ->
{:->, [], [[k], compile(v, Map.put(env, :accessor, accessor))]}
end)
quote do
case plural(unquote(env.lang), unquote(accessor)) do
unquote(clauses)
end
end
end
def compile(:hash, env) do
if Map.has_key?(env, :accessor) do
quote do
inspect(unquote(env.accessor))
end
else
"#"
end
end
def compile(:comma, _env), do: ","
def compile(s, _env) when is_bitstring(s), do: s
end
defmodule Translate do
use MessageFormat
MessageFormat.compile_string :t!, "en", "test_message", """
{GENDER, select,
male {He}
female {She}
other {They}
} found {NUM_CATEGORIES, plural,
one {one category}
other {# categories}
} in {NUM_RESULTS, plural,
one {one result}
other {# results}
}.
"""
MessageFormat.compile_string :t!, "de", "test_message", """
{GENDER, select,
male {Er}
female {Sie}
other {Sie}
} fand {NUM_CATEGORIES, plural,
one {eine Kategorie}
other {# Kategorien}
} in {NUM_RESULTS, plural,
one {einem Ergebnis}
other {# Ergebnisse}
}.
"""
end
IO.puts Translate.t!("en", "test_message", %{gender: "female", num_categories: 2, num_results: 1})
IO.puts Translate.t!("de", "test_message", %{gender: "female", num_categories: 2, num_results: 1})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment