Last active
August 29, 2015 14:10
-
-
Save c-spencer/3088318056c0ff379de9 to your computer and use it in GitHub Desktop.
Simple and incomplete MessageFormat compiler for Elixir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Plural do | |
require Record | |
Record.defrecordp :xmlAttribute, Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl") | |
Record.defrecordp :xmlText, Record.extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl") | |
defmacro __using__(_env) do | |
quote do | |
@before_compile Plural | |
Module.register_attribute(__MODULE__, :plurals, accumulate: true) | |
defp plural(lang, n), do: plural(lang, n, :cardinal) | |
import Plural | |
end | |
end | |
defmacro plural(lang) do | |
quote do | |
@plurals unquote(lang) | |
end | |
end | |
defmacro __before_compile__(env) do | |
Module.get_attribute(env.module, :plurals) | |
|> Enum.into(HashSet.new) | |
|> Enum.map(&compile_lang(&1)) | |
end | |
defp compile_lang(lang) do | |
load(lang) | |
|> Enum.map(fn ({k, v}) -> {k, parse(v)} end) | |
|> compile_rules(lang) | |
end | |
# Compiles a list of rules into a def | |
defp compile_rules(rules, lang) do | |
{ clauses, deps } = Enum.reduce(Enum.reverse(rules), { [], HashSet.new }, | |
fn({name, {ast, deps}}, { clauses, alldeps }) -> | |
{ [{:->, [], [[ast], name]}|clauses], Set.union(alldeps, deps) } | |
end) | |
compiled_deps = Set.delete(deps, :n) | |
|> Enum.map("e(do: unquote(var(&1)) = unquote(compile_dep(&1)))) | |
quote do | |
def plural(unquote(lang), unquote(var(:n)), :cardinal) do | |
unquote_splicing(compiled_deps) | |
cond do | |
unquote(clauses) | |
end | |
end | |
end | |
end | |
# Helper function to generate var references | |
defp var(name), do: {name, [], :prelude} | |
# Shared structure for v/f/t | |
defp after_decimal do | |
quote do: unquote(var(:n)) | |
|> inspect | |
|> String.split(".") | |
|> Enum.at(1) || "" | |
end | |
# Compiles the index numbers needed for pluralising | |
defp compile_dep(:i) do | |
quote do: trunc(unquote(var(:n))) | |
end | |
defp compile_dep(:v) do | |
quote do: unquote(after_decimal) |> String.length | |
end | |
defp compile_dep(:f) do | |
quote do: unquote(after_decimal) |> Integer.parse | |
end | |
defp compile_dep(:t) do | |
quote do: unquote(after_decimal) |> String.strip(?0) |> String.length | |
end | |
# Load a langs plurals from the XML | |
defp load(lang) do | |
{:ok, f} = :file.read_file(__DIR__ <> "/plurals.xml") | |
{xml, _} = f | |
|> :binary.bin_to_list | |
|> :xmerl_scan.string | |
qs = "//pluralRules[contains(concat(' ', @locales, ' '), ' #{lang} ')]/pluralRule" | |
for el <- q(qs, xml) do | |
[xmlAttribute(value: count)] = q("./@count", el) | |
[xmlText(value: rule)] = q("./text()", el) | |
{ List.to_string(count), extract_rule(rule) } | |
end | |
end | |
defp q(s, xml) do | |
:xmerl_xpath.string(to_char_list(s), xml) | |
end | |
defp extract_rule(rule) do | |
Regex.run(~r/^[^@]*/, List.to_string(rule)) | |
|> List.first | |
|> String.strip | |
end | |
# Parse a string into {ast, deps} | |
defp parse("") do | |
{true, HashSet.new} | |
end | |
defp parse(str) do | |
{tokens, deps} = tokenise(str, [], HashSet.new) | |
{parse_tree(tokens, [], []) |> compile, deps} | |
end | |
# Tokenise string, using simple recursive peeking | |
defp tokenise(str, tokens, deps) do | |
case str do | |
"" -> {Enum.reverse(tokens), deps} | |
<<"and", str::binary>> -> tokenise(str, [{:op,:and}|tokens], deps) | |
<<"or", str::binary>> -> tokenise(str, [{:op,:or}|tokens], deps) | |
<<"..", str::binary>> -> tokenise(str, [{:op,:range}|tokens], deps) | |
<<"!=", str::binary>> -> tokenise(str, [{:op,:neq}|tokens], deps) | |
<<"%", str::binary>> -> tokenise(str, [{:op,:mod}|tokens], deps) | |
<<"=", str::binary>> -> tokenise(str, [{:op,:eq}|tokens], deps) | |
<<",", str::binary>> -> tokenise(str, [{:op,:comma}|tokens], deps) | |
<<" ", str::binary>> -> tokenise(str, tokens, deps) | |
<<c::binary-size(1), str::binary>> when c == "n" or c == "i" or c == "f" | |
or c == "t" or c == "v" or c == "w" -> | |
atom = String.to_atom(c) | |
tokenise(str, [{:var,atom}|tokens], Set.put(deps, atom)) | |
str -> | |
case Regex.run(~r/^[0-9]+/, str) do | |
[n] -> | |
len = String.length(n) | |
str = String.slice(str, len, String.length(str) - len) | |
{i, ""} = Integer.parse(n) | |
tokenise(str, [{:number, i}|tokens], deps) | |
nil -> {:error, "Couldn't parse rule.", str} | |
end | |
end | |
end | |
# Parse tokens into a tree, using a shunting-yard parser | |
@precedences %{ or: 1, | |
and: 2, | |
neq: 3, eq: 3, | |
mod: 4, | |
comma: 5, | |
range: 6 } | |
defp parse_tree([], [], [output]) do | |
output | |
end | |
defp parse_tree([], [op|opstack], output) do | |
push_op(op, [], opstack, output) | |
end | |
defp parse_tree([{:op, o1}|rest], [], output) do | |
parse_tree(rest, [o1], output) | |
end | |
defp parse_tree([{:op, o1}|rest], [o2|opstack], output) do | |
if @precedences[o1] <= @precedences[o2] do | |
push_op(o2, [{:op, o1}|rest], opstack, output) | |
else | |
parse_tree(rest, [o1,o2|opstack], output) | |
end | |
end | |
defp parse_tree([node|rest], opstack, output) do | |
parse_tree(rest, opstack, [node|output]) | |
end | |
defp push_op(:comma, tokens, opstack, [r,{:list, vs}|output]) do | |
parse_tree(tokens, opstack, [{:list, [r|vs]}|output]) | |
end | |
defp push_op(:comma, tokens, opstack, [r,l|output]) do | |
parse_tree(tokens, opstack, [{:list, [r,l]}|output]) | |
end | |
defp push_op(op, tokens, opstack, [r,l|output]) do | |
parse_tree(tokens, opstack, [{:binary, op, l, r}|output]) | |
end | |
# Compile out the tree into elixir forms | |
@op_map %{ or: :or, | |
and: :and, | |
neq: :!=, eq: :==, | |
mod: :rem } | |
defp compile({:number, n}), do: n | |
defp compile({:var, v}), do: var(v) | |
defp compile({:binary, :eq, l, {:list, vs}}) do | |
Enum.map(vs, &compile({:binary, :eq, l, &1})) | |
|> Enum.reduce("e do: unquote(&2) or unquote(&1)) | |
end | |
defp compile({:binary, :eq, l, {:binary, :range, lr, rr}}) do | |
quote do | |
unquote(compile(l)) in unquote(compile(lr))..unquote(compile(rr)) | |
end | |
end | |
defp compile({:binary, :neq, l, {:list, vs}}) do | |
quote do: !unquote(compile({:binary, :eq, l, {:list, vs}})) | |
end | |
defp compile({:binary, op, l, r}) do | |
{@op_map[op], [context: Elixir, import: Kernel], [compile(l), compile(r)]} | |
end | |
end | |
defmodule MessageFormat do | |
require Plural | |
defmacro __using__(_env) do | |
quote do | |
use Plural | |
end | |
end | |
# defines a name(lang, key, options) function | |
# eg compile_string(:t!, "en", "test", "my test string") | |
defmacro compile_string(name, lang, key, string) do | |
compiled = MessageFormat.compile(MessageFormat.parse(string), %{lang: lang}) | |
quote do | |
Plural.plural unquote(lang) | |
def unquote(name)(unquote(lang), unquote(key), unquote({:args, [], Elixir})) do | |
String.strip(unquote(compiled)) | |
end | |
end | |
end | |
# Parse a string to an ast | |
def parse(str) do | |
{:ok, tokens} = tokenise(str, { "", [], 0 }) | |
tokens | |
|> Enum.filter(fn (t) -> t != "" end) | |
|> parse_tree([]) | |
end | |
# Tokenise a string | |
defp tokenise("", { buffer, tokens, 0 }) do | |
{:ok, Enum.reverse [buffer|tokens]} | |
end | |
defp tokenise("", { _buffer, _tokens, _ }) do | |
{:error, "Unmatched opening bracket"} | |
end | |
defp tokenise(str, { buffer, tokens, b_depth }) do | |
<<c::binary-size(1), rest::binary>> = str | |
case { b_depth, c } do | |
{_, "{"} -> | |
tokenise(rest, { "", [:open, buffer | tokens], b_depth+1}) | |
{n, "}"} when n > 0 -> | |
tokenise(rest, { "", [:close, buffer | tokens], b_depth-1}) | |
{_, "}"} -> {:error, "Unmatched closing bracket"} | |
{n, ","} when n > 0 -> | |
tokenise(rest, { "", [:comma, buffer | tokens], b_depth }) | |
{n, "#"} when n > 0 -> | |
tokenise(rest, { "", [:hash, buffer | tokens], b_depth }) | |
{_, "\\"} -> | |
<<c::binary-size(1), rest::binary>> = rest | |
tokenise(rest, { buffer <> c, tokens, b_depth }) | |
{_, c} -> | |
tokenise(rest, { buffer <> c, tokens, b_depth }) | |
end | |
end | |
# Parse tokens out into an ast | |
defp parse_tree(tokens, olist) do | |
case tokens do | |
[:open | rest] -> | |
{ clause, rest } = parse_tree(rest, []) | |
clause = parse_clause(clause) | |
parse_tree(rest, [clause|olist]) | |
[:close | rest] -> | |
{ Enum.reverse(olist), rest } | |
[x | rest] -> | |
parse_tree(rest, [x|olist]) | |
[] -> | |
Enum.reverse(olist) | |
end | |
end | |
# takes a bracketed clause and returns either a string or a | |
# tuple describing the operation | |
defp parse_clause([op1, :comma, op2 | rest]) do | |
command = String.strip(op2) | |
formatter([op1, command | rest]) | |
end | |
defp parse_clause(tokens), do: tokens | |
# recognise select/plural formatters | |
defp formatter([arg, "select", :comma|body]) do | |
{:select, arg, extract(body)} | |
end | |
defp formatter([arg, "plural", :comma|body]) do | |
{:plural, arg, extract(body)} | |
end | |
defp formatter(tokens), do: tokens | |
# Transform a list of tokens into a map | |
# [a b c d] -> %{"a"=>"b", "c"=>"d"} | |
defp extract(tokens) do | |
tokens | |
|> clean_tokens | |
|> extract_map(%{}) | |
end | |
defp extract_map([key, value|rest], m) do | |
extract_map(rest, Map.put(m, String.strip(key), value)) | |
end | |
defp extract_map([], m), do: m | |
defp clean_tokens(tokens) do | |
Enum.reduce(tokens, [], fn (r, acc) -> | |
if is_bitstring(r) do | |
case String.strip(r) do | |
"" -> acc | |
str -> [str|acc] | |
end | |
else | |
[r|acc] | |
end | |
end) | |
|> Enum.reverse | |
end | |
# Recursively compile lists | |
def compile(tokens, env) when is_list(tokens) do | |
tokens | |
|> Enum.map(fn (t) -> compile(t, env) end) | |
|> Enum.reduce(fn (right, left) -> | |
{:<>, [context: Elixir, import: Kernel], [left, right]} | |
end) | |
end | |
def compile({:select, arg, m}, env) do | |
arg = arg |> String.downcase |> String.to_atom | |
clauses = Enum.map(m, fn(x) -> | |
{ k, v } = x | |
{:->, [], [[k], compile(v, env)]} | |
end) | |
quote do | |
case unquote({:args, [], Elixir})[unquote(arg)] do | |
unquote(clauses) | |
end | |
end | |
end | |
def compile({:plural, arg, m}, env) do | |
arg = arg |> String.downcase |> String.to_atom | |
accessor = quote do | |
unquote({:args, [], Elixir})[unquote(arg)] | |
end | |
clauses = Enum.map(m, fn({ k, v }) -> | |
{:->, [], [[k], compile(v, Map.put(env, :accessor, accessor))]} | |
end) | |
quote do | |
case plural(unquote(env.lang), unquote(accessor)) do | |
unquote(clauses) | |
end | |
end | |
end | |
def compile(:hash, env) do | |
if Map.has_key?(env, :accessor) do | |
quote do | |
inspect(unquote(env.accessor)) | |
end | |
else | |
"#" | |
end | |
end | |
def compile(:comma, _env), do: "," | |
def compile(s, _env) when is_bitstring(s), do: s | |
end | |
defmodule Translate do | |
use MessageFormat | |
MessageFormat.compile_string :t!, "en", "test_message", """ | |
{GENDER, select, | |
male {He} | |
female {She} | |
other {They} | |
} found {NUM_CATEGORIES, plural, | |
one {one category} | |
other {# categories} | |
} in {NUM_RESULTS, plural, | |
one {one result} | |
other {# results} | |
}. | |
""" | |
MessageFormat.compile_string :t!, "de", "test_message", """ | |
{GENDER, select, | |
male {Er} | |
female {Sie} | |
other {Sie} | |
} fand {NUM_CATEGORIES, plural, | |
one {eine Kategorie} | |
other {# Kategorien} | |
} in {NUM_RESULTS, plural, | |
one {einem Ergebnis} | |
other {# Ergebnisse} | |
}. | |
""" | |
end | |
IO.puts Translate.t!("en", "test_message", %{gender: "female", num_categories: 2, num_results: 1}) | |
IO.puts Translate.t!("de", "test_message", %{gender: "female", num_categories: 2, num_results: 1}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment