Skip to content

Instantly share code, notes, and snippets.

@chansen
Created October 26, 2013 00:47
Show Gist options
  • Save chansen/7163968 to your computer and use it in GitHub Desktop.
Save chansen/7163968 to your computer and use it in GitHub Desktop.
Extracts name and filename values from Content-Disposition header in multipart/form-data
#!/usr/bin/perl
use strict;
use warnings;
# <https://bugzilla.mozilla.org/show_bug.cgi?id=136676>
# <https://bugs.webkit.org/show_bug.cgi?id=62107>
# <https://www.w3.org/Bugs/Public/show_bug.cgi?id=16909>
# extracts name and filename values from Content-Disposition header.
# returns the escaped value, due to different behaviour across browsers. (see below)
sub extract_form_data {
local $_ = shift;
# Fast exit for common form-data disposition
if (/\A form-data; \s name="((?:[^"]|\\")*)" (?: ;\s filename="((?:[^"]|\\")*)" )? \z/x) {
return ($1, $2);
}
# disposition type must be form-data
s/\A \s* form-data \s* ; //xi
or return;
my (%p, $k, $v);
while (length) {
s/ ^ \s+ //x;
s/ \s+ $ //x;
# skip empty parameters and unknown tokens
next if s/^ [^\s"=;]* \s* ; //x;
# parameter name (token)
s/^ ([^\s"=;]+) \s* = \s* //x
or return;
$k = lc $1;
# quoted parameter value
if (s/^ "((?:[^"]|\\")*)" \s* (?: ; | $) //x) {
$v = $1;
}
# unquoted parameter value (token)
elsif (s/^ ([^\s";]*) \s* (?: ; | $) //x) {
$v = $1;
}
else {
return;
}
if ($k eq 'name' || $k eq 'filename') {
return if exists $p{$k};
$p{$k} = $v;
}
}
return exists $p{name} ? @p{qw(name filename)} : ();
}
my @tests = (
[ q<form-data; name="foo">,
[ q<foo>, undef ]
],
[ q<form-data; name="">,
[ q<>, undef ]
],
[ q<form-data; name=""; filename="">,
[ q<>, q<> ]
],
[ q<form-data; name="foo"; filename="">,
[ q<foo>, q<> ]
],
[ q<form-data; name=""; filename="foo.ext">,
[ q<>, q<foo.ext> ]
],
[ q<form-data; name="Foo"; filename="doc.ext">,
[ q<Foo>, q<doc.ext> ]
],
[ q<form-data; name="foO"; filename="bar baz.ext">,
[ q<foO>, q<bar baz.ext> ]
],
[ q<form-data; name="FoO"; filename="b\az.ext">,
[ q<FoO>, q<b\az.ext> ]
],
[ q<form-data; name="FOO"; filename="\"quoted\" bar.ext">,
[ q<FOO>, q<\"quoted\" bar.ext> ]
],
[ q<form-data; name="foo"; filename="foo;bar;baz.ext">,
[ q<foo>, q<foo;bar;baz.ext> ]
],
[ q<form-data; name="foo"; filename="foo\\bar||baz\\ext">,
[ q<foo>, q<foo\\bar||baz\\ext> ]
],
[ q<form-data; name="foo"; filename="'bar.ext'">,
[ q<foo>, q<'bar.ext'> ]
],
[ q<form-data; filename="foo"; name="bar">,
[ q<bar>, q<foo> ]
],
[ q<form-data; name="=name"; filename="=filename.ext">,
[ q<=name>, q<=filename.ext> ]
],
[ q<form-data; name="foo"; filename="/path/filename.ext">,
[ q<foo>, q</path/filename.ext> ]
],
# disposition type and parameter names are case insensitive
[ q<FORM-DATA; name="foo"; FiLeNaMe="bar">,
[ q<foo>, q<bar> ]
],
[ q<form-data; NamE="foo">,
[ q<foo>, undef ]
],
# unquoted parameter values
[ q<form-data; name=foo>,
[ q<foo>, undef ]
],
[ q<form-data; name=foo; filename=>,
[ q<foo>, q<> ]
],
[ q<form-data; name=foo; filename=baz.ext>,
[ q<foo>, q<baz.ext> ]
],
[ q<form-data; name=foo; filename=foo-bar+baz.ext>,
[ q<foo>, q<foo-bar+baz.ext> ]
],
# excessive LWS
[ q< form-data ; name = "foo" >,
[ q<foo>, undef ]
],
[ q< form-data ; name = foo >,
[ q<foo>, undef ]
],
[ q< form-data ; name = "foo" ; filename = "baz" >,
[ q<foo>, q<baz> ]
],
[ q< form-data ; name = "foo" ; filename = >,
[ q<foo>, q<> ]
],
# lack of LWS
[ q<form-data;name="foo">,
[ q<foo>, undef ]
],
[ q<form-data;name=foo>,
[ q<foo>, undef ]
],
[ q<form-data;name="foo";filename="baz">,
[ q<foo>, q<baz> ]
],
[ q<form-data;name="foo";filename=>,
[ q<foo>, q<> ]
],
# extension parameters are ignored
[ q<form-data; name="foo"; baz="foo">,
[ q<foo>, undef ]
],
[ q<form-data; name="foo"; baz="foo"; baz="bar">,
[ q<foo>, undef ]
],
[ q<form-data; name="foo"; filename="bar"; baz="foo"; baz="bar">,
[ q<foo>, q<bar> ]
],
# ignore empty parameters and unknown tokens
[ q<form-data; name=foo; ;; baz=foo ; >,
[ q<foo>, undef ]
],
[ q<form-data; name=foo; baz ; baz=foo ; >,
[ q<foo>, undef ]
],
# Webkit based browsers percent-encode double-quote marks but does not
# percent-encode any percent characters or backslash characters.
# " Foo %22 " => %22 Foo %22 %22
# " \" " => %22 \%22 %22
[ q<form-data; name="foo"; filename="%22 Foo %22 %22">,
[ q<foo>, q<%22 Foo %22 %22> ]
],
[ q<form-data; name="foo"; filename="%22 \%22 %22">,
[ q<foo>, q<%22 \%22 %22> ]
],
# Firefox quotes double-quote marks but does not quoute any backslash
# characters.
# " Foo " => \" Foo \"
# " \" " => \" \\" \"
[ q<form-data; name="foo"; filename="\" Foo \"">,
[ q<foo>, q<\" Foo \"> ]
],
[ q<form-data; name="foo"; filename="\" \\" \"">,
[ q<foo>, q<\" \\" \"> ]
],
# IE may provide a path containing backslash characters but does not
# encode or quote the backslash character.
[ q<form-data; name="foo"; filename="C:\Documents and Settings\user\Documents\file.ext">,
[ q<foo>, q<C:\Documents and Settings\user\Documents\file.ext> ]
],
# some old browsers does not escape double-quote marks
# <https://bugzilla.mozilla.org/show_bug.cgi?id=136676>
[ q<form-data; name=" foo " ">,
[ ],
],
# disposition type must be form-data
[ q<attachment; name="foo">,
[ ]
],
[ q<attachment; name="foo"; filename="baz">,
[ ]
],
# name parameter is required
[ q<form-data;">,
[ ]
],
[ q<form-data; filename="bar">,
[ ]
],
# name parameter redundantly specified
[ q<form-data; name="foo"; name="bar"; filename="baz">,
[ ]
],
# filename parameter redundantly specified
[ q<form-data; name="foo"; filename="bar"; filename="baz">,
[ ]
],
[ q<form-data; name>,
[ ]
]
);
use Test::More;
foreach my $test (@tests) {
my ($disposition, $exp) = @$test;
my $got = [ extract_form_data($disposition) ];
is_deeply($got, $exp, "<$disposition>");
}
done_testing();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment