Created
May 6, 2023 05:56
-
-
Save petermueller/a6678e1e620fa009b7bf4ec9b1455009 to your computer and use it in GitHub Desktop.
Content-Disposition Library ideas?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule HttpUtils.Download do | |
@moduledoc """ | |
A module for interacting with the `MyApp.Downloadable` protocol in a web context. | |
This module contains a collection of functions for commonly use-cases, | |
such as sending chunked streams on a `t:Plug.Conn.t/0` | |
""" | |
alias MyApp.Downloadable | |
# RFC 2616 Section 2.2 | |
# Clarified by RFC 6266 | |
# ==== | |
# OCTET = <any 8-bit sequence of data> | |
# CHAR = <any US-ASCII character (octets 0 - 127)> | |
# UPALPHA = <any US-ASCII uppercase letter "A".."Z"> | |
# LOALPHA = <any US-ASCII lowercase letter "a".."z"> | |
# ALPHA = UPALPHA | LOALPHA | |
# DIGIT = <any US-ASCII digit "0".."9"> | |
# CTL = <any US-ASCII control character | |
# (octets 0 - 31) and DEL (127)> | |
# CR = <US-ASCII CR, carriage return (13)> | |
# LF = <US-ASCII LF, linefeed (10)> | |
# SP = <US-ASCII SP, space (32)> | |
# HT = <US-ASCII HT, horizontal-tab (9)> | |
# <"> = <US-ASCII double-quote mark (34)> | |
# LWS = [CRLF] 1*( SP | HT ) | |
# TEXT = <any OCTET except CTLs, but including LWS> | |
# quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) | |
# qdtext = <any TEXT except <">> | |
# quoted-pair = "\" CHAR | |
# ==== | |
# Intentionally not including CRLF-prefixed LWS, or quoted-pair, as they require multi-char matching, which | |
# would be better served by using an actual parser, a la NimbleParsec | |
# The spec also states TEXT as based off OCTET, but RFC 6266 explicitly suggests substituting | |
# letters like "ä" (Latin Small Letter A With Diaeresis) as US-ASCII "ae" even though is a valid | |
# ASCII character, octet 228 (but not US-ASCII). Based on this we're ignoring the part of the | |
# spec that says "TEXT = <any OCTET ..." and assuming they meant "<any CHAR ..." | |
@text_chars Enum.to_list(32..126) | |
@qdtext_chars @text_chars -- [?"] | |
@quoted_string_chars @qdtext_chars | |
@mapset_quoted_string_strings MapSet.new(@quoted_string_chars, &List.to_string([&1])) | |
# RFCs 5987 & 8187, Sections 3.2.1 | |
# ==== | |
# ext-value = charset "'" [ language ] "'" value-chars | |
# ; like RFC 2231's <extended-initial-value> | |
# ; (see [RFC2231], Section 7) | |
# Parameter extension value charset | |
# charset = "UTF-8" / mime-charset | |
# mime-charset = 1*mime-charsetc | |
# mime-charsetc = ALPHA / DIGIT | |
# / "!" / "#" / "$" / "%" / "&" | |
# / "+" / "-" / "^" / "_" / "`" | |
# / "{" / "}" / "~" | |
# ; as <mime-charset> in Section 2.3 of [RFC2978] | |
# ; except that the single quote is not included | |
# ; SHOULD be registered in the IANA charset registry | |
# @rfc_5987_parameter_extension_custom_charset_chars '!#$%&+-^_`{}~' | |
# value-chars = *( pct-encoded / attr-char ) | |
# pct-encoded = "%" HEXDIG HEXDIG | |
# ; see [RFC3986], Section 2.1 | |
# attr-char = ALPHA / DIGIT | |
# / "!" / "#" / "$" / "&" / "+" / "-" / "." | |
# / "^" / "_" / "`" / "|" / "~" | |
# ; token except ( "*" / "'" / "%" ) | |
# ==== | |
@rfc_5987_parameter_extension_value_chars Enum.flat_map( | |
[?A..?Z, ?a..?z, ?0..?9, '!#$&+-.^_`|~'], | |
&Enum.to_list/1 | |
) | |
@mapset_param_ext_value_chars MapSet.new(@rfc_5987_parameter_extension_value_chars) | |
# Types | |
@typep disposition_atom() :: :inline | :attachment | |
@type disposition() :: disposition_atom() | String.t() | |
# Public Functions | |
@doc false | |
@spec content_disposition(disposition()) :: String.t() | |
def content_disposition(disposition) do | |
disposition | |
|> disposition_type() | |
|> to_string() | |
end | |
@doc ~S""" | |
Formats the given options to a standards-compliant `Content-Disposition` string. | |
Raises if given an unsupported disposition as an atom or case-insensitive string | |
See the implementation comments for more context into the RFCs, and specific characters left | |
unescaped. | |
## Arguments | |
* `disposition` - The disposition type to use | |
* `downloadable` - A `t:MyApp.Downloadable.t/1` that is used to gather the base filename and | |
extension. This argument is optional and when passed will be encoded for the "filename*=" | |
header parameter. For the "filename=" legacy header parameter, any non-US-ASCII characters | |
(interpreted as codepoints) will be replaced with `_` to support older browsers. | |
## Examples | |
iex> Download.content_disposition(:inline) | |
"inline" | |
iex> Download.content_disposition(:attachment) | |
"attachment" | |
iex> Download.content_disposition(%MyApp.ZipPdfDownload{filename: "kittens"}, "inline") | |
"inline; filename=\"kittens.zip\"; filename*=UTF-8''kittens.zip" | |
iex> Download.content_disposition(%MyApp.ZipPdfDownload{filename: "kïttéñs"}, :attachment) | |
"attachment; filename=\"k_tt__s.zip\"; filename*=UTF-8''k%C3%AFtt%C3%A9%C3%B1s.zip" | |
iex> Download.content_disposition("form-data") | |
** (ArgumentError) form-data unsupported, use `Plug.Parsers.MULTIPART` | |
iex> Download.content_disposition("filename=\"myfile.txt\"") | |
** (ArgumentError) invalid disposition type: `"filename=\"myfile.txt\""`, use `:inline` or `:attachment` | |
""" | |
@spec content_disposition(Downloadable.t(), disposition()) :: String.t() | |
def content_disposition(downloadable, disposition) do | |
disposition = disposition_type(disposition) | |
filename = Downloadable.filename(downloadable) <> "." <> Downloadable.extension(downloadable) | |
Enum.join([disposition, ascii_filename(filename), utf8_filename(filename)], "; ") | |
end | |
@doc false | |
@spec disposition_type(disposition()) :: disposition_atom() | no_return() | |
def disposition_type(:inline), do: :inline | |
def disposition_type(:attachment), do: :attachment | |
def disposition_type(disposition) when is_binary(disposition) do | |
case String.downcase(disposition) do | |
"attachment" -> | |
:attachment | |
"inline" -> | |
:inline | |
"form-data" -> | |
raise(ArgumentError, "form-data unsupported, use `Plug.Parsers.MULTIPART`") | |
_ -> | |
raise( | |
ArgumentError, | |
"invalid disposition type: `#{inspect(disposition)}`, use `:inline` or `:attachment`" | |
) | |
end | |
end | |
@doc false | |
@spec utf8_filename(String.t()) :: String.t() | |
def utf8_filename(filename) do | |
filename = URI.encode(filename, &(&1 in @mapset_param_ext_value_chars)) | |
"filename*=UTF-8''#{filename}" | |
end | |
@doc false | |
@spec ascii_filename(String.t()) :: String.t() | |
def ascii_filename(filename), do: "filename=\"#{to_ascii(filename)}\"" | |
@doc false | |
@spec to_ascii(String.t()) :: String.t() | |
def to_ascii(utf8) do | |
for char <- String.codepoints(utf8), | |
into: "", | |
do: if(char in @mapset_quoted_string_strings, do: char, else: "_") | |
end | |
@typedoc """ | |
Result of a parsed Content-Disposition header | |
""" | |
@type parsed_content_disposition() :: %{ | |
optional(:filename_utf8) => String.t(), | |
optional(:legacy_filename) => String.t(), | |
required(:disposition) => disposition_atom() | |
} | |
@doc ~S""" | |
Parses a [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition) | |
based upon the IETF RFCs, with some room for incorrect encoding from the sender. | |
Returns a `t:parsed_content_disposition/0` map | |
Raises if given an unsupported disposition type | |
## Important Notes on the `t:parsed_content_disposition/0` result | |
- only the `:disposition` key will always be present. | |
- when consuming, it is HIGHLY suggested to use the `:filename_utf8` field if present | |
- `:legacy_filename` is intentionally NOT decoded. | |
- No path cleanup is done. Treat the values as unsafe, like any other external input | |
See the implementation comments for more context into the RFCs, and specific characters. | |
## Examples | |
iex> Download.parse_content_disposition("inline") | |
%{disposition: :inline} | |
iex> Download.parse_content_disposition("attachment") | |
%{disposition: :attachment} | |
iex> Download.parse_content_disposition("attachment; filename*=UTF-8''k%C3%AFtt%C3%A9%C3%B1.jpg") | |
%{filename_utf8: "kïttéñ.jpg", disposition: :attachment} | |
iex> Download.parse_content_disposition("inline; filename=\"kitten.jpg\"; filename*=UTF-8''kitten.jpg") | |
%{filename_utf8: "kitten.jpg", legacy_filename: "kitten.jpg", disposition: :inline} | |
# Doesn't try to decode legacy filenames | |
iex> Download.parse_content_disposition("attachment; filename=\"k%3Ftt%3F%3F.jpg\"") | |
%{legacy_filename: "k%3Ftt%3F%3F.jpg", disposition: :attachment} | |
""" | |
@spec parse_content_disposition(String.t()) :: parsed_content_disposition() | |
def parse_content_disposition(header_value) do | |
[disposition | rest] = :binary.split(header_value, ";") | |
disposition = disposition_type(disposition) | |
params = | |
rest | |
|> List.first("") | |
|> Plug.Conn.Utils.params() | |
raw_filename_star = params["filename*"] || "" | |
filename_utf8? = | |
raw_filename_star | |
|> String.trim_leading() | |
|> String.downcase(:ascii) | |
|> String.starts_with?("utf-8") | |
filename_utf8 = | |
with true <- filename_utf8?, | |
{:ok, filename} <- | |
strip_utf8_str_and_language_tag_from_parameter_extension(raw_filename_star), | |
filename <- URI.decode(filename), | |
true <- String.valid?(filename) do | |
filename | |
else | |
_ -> nil | |
end | |
[ | |
filename_utf8: filename_utf8, | |
legacy_filename: params["filename"], | |
disposition: disposition | |
] | |
|> Enum.reject(fn {_k, v} -> | |
is_nil(v) | |
end) | |
|> Map.new() | |
end | |
# Private functions | |
# This could also be a split on single-quote, "'" based on the spec, but this is a bit more forgiving of bad encoding | |
@filename_utf8_maybe_language_tag_regex_capture ~r/^utf-8'(?<lang_tag>.*-?.*)'(?<filename>.+)/i | |
defp strip_utf8_str_and_language_tag_from_parameter_extension(raw_string) do | |
string = String.trim(raw_string) | |
case Regex.named_captures(@filename_utf8_maybe_language_tag_regex_capture, string) do | |
nil -> {:error, "failed to extract the filename* parameter"} | |
%{"filename" => ""} -> {:error, "filename* parameter empty"} | |
%{"filename" => filename} -> {:ok, filename} | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Corresponding discussion in ElixirForum:
https://elixirforum.com/t/library-for-parsing-and-building-rfc-compliant-content-disposition-headers/55708