Created
October 18, 2024 16:42
-
-
Save almirsarajcic/2f54da6900d7561afc7a74ca0ac62381 to your computer and use it in GitHub Desktop.
Checking links in the Elixir package documentation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Logger.configure(level: :warning) | |
Mix.install([ | |
{:req, "~> 0.3.0"}, | |
{:earmark, "~> 1.4"} | |
]) | |
defmodule DocLinkChecker do | |
def check_links do | |
project_root = File.cwd!() | |
mix_project = load_mix_project(project_root) | |
extras = parse_extras(mix_project) | |
module_to_package_map = create_module_to_package_map(mix_project) | |
extras | |
|> Enum.flat_map(&extract_links(&1, project_root)) | |
|> Enum.uniq() | |
|> Enum.map(&is_valid_link?(&1, project_root, extras, module_to_package_map)) | |
|> Enum.reject(fn | |
{:valid, _} -> true | |
_ -> false | |
end) | |
end | |
defp load_mix_project(project_root) do | |
mix_exs_path = Path.join(project_root, "mix.exs") | |
{result, _bindings} = Code.eval_file(mix_exs_path) | |
result | |
end | |
defp parse_extras(mix_project) do | |
case mix_project do | |
{:module, module, _binary, _} -> | |
if function_exported?(module, :project, 0) do | |
project = apply(module, :project, []) | |
docs = Keyword.get(project, :docs, []) | |
extras = Keyword.get(docs, :extras, []) | |
Enum.map(extras, fn | |
{path, _opts} -> path | |
path when is_binary(path) -> path | |
end) | |
else | |
[] | |
end | |
_ -> | |
IO.puts("Warning: Unexpected result from evaluating mix.exs") | |
[] | |
end | |
end | |
defp extract_links(file, project_root) do | |
full_path = Path.join(project_root, file) | |
content = File.read!(full_path) | |
links = extract_markdown_links_with_line_numbers(content) | |
Enum.map(links, fn {link, line_number, column} -> {link, file, line_number, column} end) | |
end | |
defp extract_markdown_links_with_line_numbers(markdown) do | |
markdown | |
|> String.split("\n") | |
|> Enum.with_index(1) | |
|> Enum.flat_map(fn {line, line_number} -> | |
case Earmark.as_ast(line, warnings: false, messages: false) do | |
{:ok, ast, _} -> | |
do_extract_links(ast, line) | |
|> Enum.map(fn {link, column} -> {link, line_number, column} end) | |
_ -> | |
[] | |
end | |
end) | |
end | |
defp do_extract_links(ast, line) do | |
Enum.flat_map(ast, fn | |
{"a", [{"href", url} | _], _, _} -> | |
case :binary.match(line, url) do | |
{start, _} -> [{url, start + 1}] | |
:nomatch -> [] | |
end | |
{_, _, children, _} when is_list(children) -> | |
do_extract_links(children, line) | |
_ -> | |
[] | |
end) | |
end | |
defp is_valid_link?( | |
{url, source_file, line_number, column}, | |
project_root, | |
extras, | |
module_to_package_map | |
) do | |
cond do | |
String.match?(url, ~r/^[t|c]:[\w\.]+\/\d+$/) -> | |
case convert_doc_reference_to_url(url, module_to_package_map) do | |
{:ok, converted_url} -> | |
is_valid_link?( | |
{converted_url, source_file, line_number, column}, | |
project_root, | |
extras, | |
module_to_package_map | |
) | |
:error -> | |
{:invalid, {url, source_file, line_number, column, "Invalid doc reference format"}} | |
end | |
url_with_scheme?(url) -> | |
case check_remote_link(url) do | |
{:valid, nil} -> | |
{:valid, nil} | |
{:redirected, final_url} -> | |
{:redirected, {url, final_url, source_file, line_number, column}} | |
{:invalid, reason} -> | |
{:invalid, {url, source_file, line_number, column, reason}} | |
end | |
String.starts_with?(url, "/") -> | |
if check_local_file(url, project_root, extras) do | |
{:valid, nil} | |
else | |
{:invalid, {url, source_file, line_number, column, "Local file not found"}} | |
end | |
true -> | |
if check_relative_file(url, source_file, project_root, extras) do | |
{:valid, nil} | |
else | |
{:invalid, {url, source_file, line_number, column, "Relative file not found"}} | |
end | |
end | |
end | |
defp url_with_scheme?(url) do | |
uri = URI.parse(url) | |
uri.scheme != nil && uri.host != nil | |
end | |
defp check_remote_link(url) do | |
case Req.get(url, | |
max_retries: 2, | |
retry_delay: 500, | |
receive_timeout: 5_000 | |
) do | |
{:ok, %{status: status}} when status in 200..299 -> | |
{:valid, nil} | |
{:ok, %{status: status, headers: headers}} when status in 300..399 -> | |
location = | |
headers |> Enum.find(fn {k, _} -> String.downcase(k) == "location" end) |> elem(1) | |
{:redirected, location} | |
{:ok, %{status: status}} -> | |
{:invalid, "HTTP #{status}"} | |
{:error, exception} -> | |
{:invalid, Exception.message(exception)} | |
end | |
end | |
defp check_local_file(path, project_root, extras) do | |
potential_paths = [ | |
Path.join(project_root, path), | |
Path.join([project_root, "documentation", path]) | |
| Enum.map(extras, &Path.join(project_root, &1)) | |
] | |
Enum.any?(potential_paths, &File.exists?/1) | |
end | |
defp check_relative_file(path, source_file, project_root, extras) do | |
potential_paths = [ | |
Path.join([project_root, Path.dirname(source_file), path]), | |
Path.join([project_root, "documentation", path]) | |
| Enum.map(extras, &Path.join(project_root, &1)) | |
] | |
Enum.any?(potential_paths, &File.exists?/1) | |
end | |
defp create_module_to_package_map(mix_project) do | |
case mix_project do | |
{:module, module, _, _} -> | |
if function_exported?(module, :project, 0) do | |
project = apply(module, :project, []) | |
app = Keyword.get(project, :app) | |
deps = Keyword.get(project, :deps, []) | |
deps_map = | |
deps | |
|> Enum.map(fn | |
{dep, _opts} when is_atom(dep) -> {dep, dep} | |
{dep, _version} when is_atom(dep) -> {dep, dep} | |
{dep, _version, _opts} when is_atom(dep) -> {dep, dep} | |
dep when is_atom(dep) -> {dep, dep} | |
end) | |
|> Map.new() | |
Map.put(deps_map, app, app) | |
else | |
%{} | |
end | |
_ -> | |
%{} | |
end | |
end | |
defp convert_doc_reference_to_url(reference, module_to_package_map) do | |
case Regex.run(~r/^([tc]):(.+)\/(\d+)$/, reference) do | |
[_, type, path, arity] -> | |
module = path |> String.split(".") |> Enum.drop(-1) |> Enum.join(".") | |
function = path |> String.split(".") |> List.last() | |
package = Map.get(module_to_package_map, String.to_atom(module), guess_package(module)) | |
url = "https://hexdocs.pm/#{package}/#{module}.html##{type}:#{function}/#{arity}" | |
{:ok, url} | |
_ -> | |
:error | |
end | |
end | |
defp guess_package(module) do | |
module |> String.split(".") |> List.first() |> Macro.underscore() | |
end | |
end | |
{invalid_links, redirected_links} = | |
DocLinkChecker.check_links() | |
|> Enum.split_with(fn | |
{:invalid, _} -> true | |
{:redirected, _} -> false | |
end) | |
if Enum.empty?(invalid_links) and Enum.empty?(redirected_links) do | |
IO.puts("All links are valid!") | |
else | |
unless Enum.empty?(invalid_links) do | |
IO.puts("\nThe following links are invalid:") | |
Enum.each(invalid_links, fn | |
{:invalid, {link, source, line_number, column, reason}} -> | |
IO.puts( | |
"#{source}:#{line_number}:#{column}: Invalid link: #{link} - Reason: #{inspect(reason)}" | |
) | |
end) | |
end | |
unless Enum.empty?(redirected_links) do | |
IO.puts("\nThe following links are redirected:") | |
Enum.each(redirected_links, fn | |
{:redirected, {original_url, final_url, source, line_number, column}} -> | |
IO.puts( | |
"#{source}:#{line_number}:#{column}: Redirected link: #{original_url} -> #{final_url}" | |
) | |
end) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment