Created
January 31, 2025 15:23
-
-
Save lud/82b867ed0f78125d0bf145da31b26136 to your computer and use it in GitHub Desktop.
Oxford word scrambler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Tool do | |
def tokenize(str) do | |
tokenize(str, [], [], :whitespace) | |
end | |
@special_chars ~c"\"'.,;:!?()[]{} \n\t\r/\\*_@$“”" | |
def tokenize(<<c::utf8, rest::binary>>, token_acc, acc, :whitespace) | |
when c in @special_chars do | |
tokenize(rest, [c | token_acc], acc, :whitespace) | |
end | |
def tokenize(<<c::utf8, _::binary>> = str, token_acc, acc, :word) when c in @special_chars do | |
acc = [flip(token_acc, :word) | acc] | |
tokenize(str, [], acc, :whitespace) | |
end | |
def tokenize(<<c::utf8, _::binary>> = str, token_acc, acc, :whitespace) do | |
acc = [flip(token_acc, :whitespace) | acc] | |
tokenize(str, [], acc, :word) | |
end | |
def tokenize(<<c::utf8, c::utf8, rest::binary>>, token_acc, acc, :word) do | |
tokenize(rest, [[c, c] | token_acc], acc, :word) | |
end | |
def tokenize(<<c::utf8, rest::binary>>, token_acc, acc, :word) do | |
tokenize(rest, [c | token_acc], acc, :word) | |
end | |
def tokenize(<<>>, token_acc, acc, kind) do | |
acc = [flip(token_acc, kind) | acc] | |
Enum.reverse(acc) | |
end | |
def flip(chars, type) do | |
{type, Enum.reverse(chars)} | |
end | |
def run(str) do | |
str | |
|> tokenize() | |
|> Enum.map(fn | |
{:whitespace, ws} -> ws | |
{:word, word} -> mix_word(word) | |
end) | |
|> IO.puts() | |
end | |
defp mix_word([a]), do: [a] | |
defp mix_word([a, b]), do: [a, b] | |
defp mix_word([a, b, c]), do: [a, b, c] | |
defp mix_word([h | t]) do | |
[e | t] = Enum.reverse(t) | |
[h, light_shuffle(t), e] | |
end | |
# Shuffle the list but keep an average order. An item at the beginning is very | |
# unlikely to find itself at the end. | |
defp light_shuffle(list) do | |
list | |
|> Enum.with_index() | |
|> Enum.sort_by(fn {_, index} -> Enum.random((index - 2)..(index + 1)) + :rand.uniform() end) | |
|> Enum.map(fn {item, _} -> item end) | |
end | |
end | |
input = """ | |
On appelle code un ensemble de lois et textes réglementaires, normatifs ou | |
juridiques qui forment un système complet de législation dans une branche du | |
droit. Ils sont souvent placés dans un recueil sous une même reliure, organisé | |
en livres, titres, chapitres, sections, sous-sections, paragraphes et | |
articles. 23 févr. 2014 | |
""" | |
Tool.run(input) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment