Created
February 22, 2024 16:04
-
-
Save hauleth/86d05c2a91d71b49b5fc939942ea426f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Langusta.Site.CleanURL do | |
defmodule Entry do | |
defstruct [ | |
:url_pattern, | |
rules: [], | |
raw_rules: [], | |
exceptions: [], | |
redirections: [] | |
] | |
def from_json(map) do | |
%__MODULE__{ | |
url_pattern: compile!(map["urlPattern"]), | |
rules: Enum.map(map["rules"] || [], &compile!/1), | |
raw_rules: Enum.map(map["rawRules"] || [], &compile!/1), | |
exceptions: Enum.map(map["exceptions"] || [], &compile!/1), | |
redirections: Enum.map(map["redirections"] || [], &compile!/1) | |
} | |
end | |
def match?(%__MODULE__{url_pattern: re}, url) do | |
Regex.match?(re, to_string(url)) | |
end | |
def process(%__MODULE__{} = this, url) do | |
str_url = to_string(url) | |
%URI{} = uri = URI.new!(url) | |
if matches?(this.exceptions, str_url) do | |
{:ok, uri} | |
else | |
with {:ok, _} <- redirect(this.redirections, str_url) do | |
new_query = | |
uri.query | |
|> URI.decode_query() | |
|> Enum.reject(fn {name, _} -> matches?(this.rules, name) end) | |
|> URI.encode_query() | |
{:ok, %URI{uri | query: new_query}} | |
end | |
end | |
end | |
defp redirect(patterns, url) do | |
Enum.find_value(patterns, {:ok, url}, fn pattern -> | |
if matches = Regex.run(pattern, url) do | |
[_, new_url | _] = matches | |
{:redirect, URI.decode(new_url)} | |
end | |
end) | |
end | |
defp matches?(patterns, value) do | |
Enum.any?(patterns, fn pattern -> Regex.match?(pattern, value) end) | |
end | |
defp compile!(binary), do: Regex.compile!(binary, [:caseless]) | |
end | |
def fetch_data do | |
Req.get!("https://rules1.clearurls.xyz/data.minify.json") | |
end | |
def compile(json) do | |
for {_name, data} <- json["providers"] do | |
Entry.from_json(data) | |
end | |
end | |
def clean(url, rules), do: do_clean(url, rules, rules) | |
defp do_clean(url, [], _), do: url | |
defp do_clean(url, [ruleset | rest], all_rules) do | |
if Entry.match?(ruleset, url) do | |
case Entry.process(ruleset, url) do | |
{:redirect, new_url} -> do_clean(new_url, all_rules, all_rules) | |
{:ok, cleaned} -> do_clean(cleaned, rest, all_rules) | |
end | |
else | |
do_clean(url, rest, all_rules) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment