Skip to content

Instantly share code, notes, and snippets.

@lud
Created January 31, 2025 15:23
Show Gist options
  • Save lud/82b867ed0f78125d0bf145da31b26136 to your computer and use it in GitHub Desktop.
Save lud/82b867ed0f78125d0bf145da31b26136 to your computer and use it in GitHub Desktop.
Oxford word scrambler
defmodule Tool do
def tokenize(str) do
tokenize(str, [], [], :whitespace)
end
@special_chars ~c"\"'.,;:!?()[]{} \n\t\r/\\*_@$“”"
def tokenize(<<c::utf8, rest::binary>>, token_acc, acc, :whitespace)
when c in @special_chars do
tokenize(rest, [c | token_acc], acc, :whitespace)
end
def tokenize(<<c::utf8, _::binary>> = str, token_acc, acc, :word) when c in @special_chars do
acc = [flip(token_acc, :word) | acc]
tokenize(str, [], acc, :whitespace)
end
def tokenize(<<c::utf8, _::binary>> = str, token_acc, acc, :whitespace) do
acc = [flip(token_acc, :whitespace) | acc]
tokenize(str, [], acc, :word)
end
def tokenize(<<c::utf8, c::utf8, rest::binary>>, token_acc, acc, :word) do
tokenize(rest, [[c, c] | token_acc], acc, :word)
end
def tokenize(<<c::utf8, rest::binary>>, token_acc, acc, :word) do
tokenize(rest, [c | token_acc], acc, :word)
end
def tokenize(<<>>, token_acc, acc, kind) do
acc = [flip(token_acc, kind) | acc]
Enum.reverse(acc)
end
def flip(chars, type) do
{type, Enum.reverse(chars)}
end
def run(str) do
str
|> tokenize()
|> Enum.map(fn
{:whitespace, ws} -> ws
{:word, word} -> mix_word(word)
end)
|> IO.puts()
end
defp mix_word([a]), do: [a]
defp mix_word([a, b]), do: [a, b]
defp mix_word([a, b, c]), do: [a, b, c]
defp mix_word([h | t]) do
[e | t] = Enum.reverse(t)
[h, light_shuffle(t), e]
end
# Shuffle the list but keep an average order. An item at the beginning is very
# unlikely to find itself at the end.
defp light_shuffle(list) do
list
|> Enum.with_index()
|> Enum.sort_by(fn {_, index} -> Enum.random((index - 2)..(index + 1)) + :rand.uniform() end)
|> Enum.map(fn {item, _} -> item end)
end
end
input = """
On appelle code un ensemble de lois et textes réglementaires, normatifs ou
juridiques qui forment un système complet de législation dans une branche du
droit. Ils sont souvent placés dans un recueil sous une même reliure, organisé
en livres, titres, chapitres, sections, sous-sections, paragraphes et
articles. 23 févr. 2014
"""
Tool.run(input)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment