Last active
June 23, 2019 00:52
-
-
Save chulkilee/df9a19214a9615582c726dffba4dff34 to your computer and use it in GitHub Desktop.
Parsing HTTP Link header with NimbleParsec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule LinkParser do | |
@moduledoc """ | |
- https://tools.ietf.org/html/rfc8288 | |
- https://tools.ietf.org/html/rfc5988 | |
- https://www.iana.org/assignments/link-relations/link-relations.xhtml | |
""" | |
import NimbleParsec | |
# permissive than spec at https://tools.ietf.org/html/rfc3986#section-4.1 | |
uri = ascii_string([not: ?>], min: 1) | |
# permissive than spec at https://tools.ietf.org/html/rfc7230#section-3.2.6 | |
token = ascii_string([?a..?z, ?A..?Z, ?0..?9], min: 1) | |
dquote = ignore(string("\"")) | |
quoted_string = | |
dquote | |
|> repeat(choice([token, string(~S(\")) |> replace(~S("))])) | |
|> concat(dquote) | |
ows = ignore(repeat(string(" "))) | |
bws = ows | |
# link-param = token BWS [ "=" BWS ( token / quoted-string ) ] | |
link_param = | |
bws | |
|> concat(token) | |
|> unwrap_and_tag(:param_name) | |
|> concat(bws) | |
|> ignore(string("=")) | |
|> concat(bws) | |
|> choice([token, quoted_string]) | |
|> tag(:param_value) | |
|> post_traverse(:build_link_param) | |
|> unwrap_and_tag(:param) | |
# link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param ) | |
link_value = | |
ows | |
|> ignore(string("<")) | |
|> concat(uri) | |
|> unwrap_and_tag(:uri) | |
|> ignore(string(">")) | |
|> repeat( | |
ows | |
|> ignore(string(";")) | |
|> concat(ows) | |
|> concat(link_param) | |
) | |
|> post_traverse(:build_link_value) | |
# Link = #link-value | |
link_values = | |
link_value | |
|> repeat( | |
ows | |
|> ignore(string(",")) | |
|> concat(ows) | |
|> concat(link_value) | |
) | |
|> concat(ows) | |
|> eos() | |
defparsecp :parse_string, link_values | |
defp build_link_value(_rest, args, context, _line, _offset) do | |
uri = Keyword.get(args, :uri) | |
params = args |> Keyword.get_values(:param) |> Enum.reverse() | |
{[%{uri: uri, params: params}], context} | |
end | |
defp build_link_param(_rest, args, context, _line, _offset) do | |
args = | |
Enum.map(args, fn {:param_value, [{:param_name, param_name} | param_values]} -> | |
{param_name, Enum.join(param_values, "")} | |
end) | |
{args, context} | |
end | |
def parse(str) do | |
case parse_string(str) do | |
{:ok, parsed, "", _, _, _} -> {:ok, parsed} | |
_ -> {:error, "failed to parse"} | |
end | |
end | |
def parse_naive(str) do | |
case str | |
|> String.split(",") | |
|> Enum.reduce_while([], fn link_values, acc -> | |
with [uri_part | params_part] <- | |
link_values |> String.split(";") |> Enum.map(&String.trim/1), | |
{:ok, uri} <- parse_uri_part(uri_part), | |
{:ok, params} <- parse_params_part(params_part) do | |
{:cont, [%{uri: uri, params: params} | acc]} | |
else | |
{:error, error} -> {:halt, {:error, error}} | |
_ -> {:halt, {:error, "cannot parse"}} | |
end | |
end) do | |
{:error, error} -> {:error, error} | |
links -> {:ok, Enum.reverse(links)} | |
end | |
end | |
defp parse_uri_part(uri) do | |
with "<" <> uri_part_without_prefix <- uri, | |
{:ok, uri} <- remove_suffix(uri_part_without_prefix, ">") do | |
{:ok, uri} | |
else | |
_ -> {:error, "bad uri part"} | |
end | |
end | |
defp parse_params_part(params_part) do | |
case Enum.reduce_while(params_part, [], fn param_part, acc -> | |
with [key, value] <- param_part |> String.split("=") |> Enum.map(&String.trim/1), | |
{:ok, value} <- parse_value(value) do | |
{:cont, [{key, value} | acc]} | |
else | |
{:error, error} -> {:error, error} | |
_ -> {:error, "bad param part"} | |
end | |
end) do | |
{:error, error} -> {:error, error} | |
val -> {:ok, Enum.reverse(val)} | |
end | |
end | |
defp parse_value("\"" <> after_dquote) do | |
case remove_suffix(after_dquote, "\"") do | |
{:ok, inside_quote} -> {:ok, String.replace(inside_quote, ~S(\"), ~S("))} | |
_ -> {:error, "failed to parse param value"} | |
end | |
end | |
defp parse_value(value), do: {:ok, value} | |
defp remove_suffix(binary, suffix) do | |
with size <- byte_size(binary), | |
suffix_size <- byte_size(suffix), | |
^suffix <- binary_part(binary, size - suffix_size, suffix_size) do | |
{:ok, binary_part(binary, 0, size - suffix_size)} | |
else | |
_ -> {:error, "does not have suffix"} | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule LinkParserTest do | |
use ExUnit.Case, async: true | |
test "single value without param" do | |
expected = [%{uri: "foo", params: []}] | |
inputs = ["<foo>", " <foo> ", "<foo>", "<foo> "] | |
run_tests(expected, inputs) | |
end | |
test "single value with single param" do | |
expected = [%{uri: "foo", params: [{"rel", "bar"}]}] | |
inputs = ["<foo>;rel=bar", " <foo> ; rel = bar ", ~S( <foo> ; rel= "bar" )] | |
run_tests(expected, inputs) | |
end | |
test "single value with multiple param" do | |
expected = [%{uri: "foo", params: [{"rel", "bar"}, {"title", "hello"}]}] | |
inputs = ["<foo>;rel=bar;title=hello", ~S( <foo> ; rel=bar;title="hello")] | |
run_tests(expected, inputs) | |
end | |
test "complicated case" do | |
expected = [ | |
%{ | |
uri: "https://example.com", | |
params: [{"rel", "preload"}, {"title", "hello"}, {"escaped", ~S(foo"bar)}] | |
} | |
] | |
inputs = [ | |
~S(<https://example.com> ; rel=preload ; title=hello;escaped="foo\"bar"), | |
~S(<https://example.com>;rel = "preload";title=hello ; escaped="foo\"bar") | |
] | |
run_tests(expected, inputs) | |
end | |
defp run_tests(expected, inputs) do | |
Enum.each(inputs, fn input -> | |
assert {:ok, expected} == LinkParser.parse(input) | |
assert {:ok, expected} == LinkParser.parse_naive(input) | |
end) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment