Created
January 7, 2010 15:28
-
-
Save dmitriid/271297 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% @author Bob Ippolito <[email protected]> | |
%% @copyright 2007 Mochi Media, Inc. | |
%% @doc Utilities for parsing multipart/form-data. Shamless rip-off off mochiweb | |
-module(webmachine_multipart2). | |
-author('[email protected]'). | |
-export([parse_form/1, parse_form/2]). | |
-export([parse_multipart_request/2]). | |
-export([test/0]). | |
-define(CHUNKSIZE, 4096). | |
-record(mp, {state, boundary, length, buffer, callback, next}). | |
%% TODO: DOCUMENT THIS MODULE. | |
parse_form(Req) -> | |
parse_form(Req, fun default_file_handler/2). | |
parse_form(Req, FileHandler) -> | |
Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end, | |
{_, _, Res} = parse_multipart_request(Req, Callback), | |
Res. | |
parse_form_outer(eof, _, Acc) -> | |
lists:reverse(Acc); | |
parse_form_outer({headers, H}, FileHandler, State) -> | |
{"form-data", H1} = proplists:get_value("content-disposition", H), | |
Name = proplists:get_value("name", H1), | |
Filename = proplists:get_value("filename", H1), | |
case Filename of | |
undefined -> | |
fun (Next) -> | |
parse_form_value(Next, {Name, []}, FileHandler, State) | |
end; | |
_ -> | |
ContentType = proplists:get_value("content-type", H), | |
Handler = FileHandler(Filename, ContentType), | |
fun (Next) -> | |
parse_form_file(Next, {Name, Handler}, FileHandler, State) | |
end | |
end. | |
parse_form_value(body_end, {Name, Acc}, FileHandler, State) -> | |
Value = binary_to_list(iolist_to_binary(lists:reverse(Acc))), | |
State1 = [{Name, Value} | State], | |
fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; | |
parse_form_value({body, Data}, {Name, Acc}, FileHandler, State) -> | |
Acc1 = [Data | Acc], | |
fun (Next) -> parse_form_value(Next, {Name, Acc1}, FileHandler, State) end. | |
parse_form_file(body_end, {Name, Handler}, FileHandler, State) -> | |
Value = Handler(eof), | |
State1 = [{Name, Value} | State], | |
fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; | |
parse_form_file({body, Data}, {Name, Handler}, FileHandler, State) -> | |
H1 = Handler(Data), | |
fun (Next) -> parse_form_file(Next, {Name, H1}, FileHandler, State) end. | |
default_file_handler(Filename, ContentType) -> | |
default_file_handler_1(Filename, ContentType, []). | |
default_file_handler_1(Filename, ContentType, Acc) -> | |
fun(eof) -> | |
Value = iolist_to_binary(lists:reverse(Acc)), | |
{Filename, ContentType, Value}; | |
(Next) -> | |
default_file_handler_1(Filename, ContentType, [Next | Acc]) | |
end. | |
parse_multipart_request(Req, Callback) -> | |
%% TODO: Support chunked? | |
Length = list_to_integer(wrq:get_req_header("content-length", Req)), | |
Boundary = iolist_to_binary( | |
get_boundary(wrq:get_req_header("content-type", Req))), | |
Prefix = <<"\r\n--", Boundary/binary>>, | |
BS = size(Boundary), | |
{Chunk, Next} = wrq:stream_req_body(Req, ?CHUNKSIZE), | |
Length1 = Length - size(Chunk), | |
<<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk, | |
feed_mp(headers, #mp{boundary=Prefix, | |
length=Length1, | |
buffer=Rest, | |
callback=Callback, | |
next=Next}). | |
parse_headers(<<>>) -> | |
[]; | |
parse_headers(Binary) -> | |
parse_headers(Binary, []). | |
parse_headers(Binary, Acc) -> | |
case find_in_binary(<<"\r\n">>, Binary) of | |
{exact, N} -> | |
<<Line:N/binary, "\r\n", Rest/binary>> = Binary, | |
parse_headers(Rest, [split_header(Line) | Acc]); | |
not_found -> | |
lists:reverse([split_header(Binary) | Acc]) | |
end. | |
split_header(Line) -> | |
{Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, | |
binary_to_list(Line)), | |
{string:to_lower(string:strip(Name)), | |
parse_header(Value)}. | |
read_more(State=#mp{length=Length, buffer=Buffer, next=Next}) -> | |
case Next of | |
done -> | |
State#mp{length=0, | |
buffer= <<>>}; | |
_ -> | |
{Data, Next1} = Next(), | |
%%Data = read_chunk(Req, Length), | |
Buffer1 = <<Buffer/binary, Data/binary>>, | |
State#mp{length=Length - size(Data), | |
buffer=Buffer1, next=Next1} | |
end. | |
feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) -> | |
{State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of | |
{exact, N} -> | |
{State, N}; | |
_ -> | |
S1 = read_more(State), | |
%% Assume headers must be less than ?CHUNKSIZE | |
{exact, N} = find_in_binary(<<"\r\n\r\n">>, | |
S1#mp.buffer), | |
{S1, N} | |
end, | |
<<Headers:P/binary, "\r\n\r\n", Rest/binary>> = State1#mp.buffer, | |
NextCallback = Callback({headers, parse_headers(Headers)}), | |
feed_mp(body, State1#mp{buffer=Rest, | |
callback=NextCallback}); | |
feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) -> | |
case find_boundary(Prefix, Buffer) of | |
{end_boundary, Start, Skip} -> | |
<<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, | |
C1 = Callback({body, Data}), | |
C2 = C1(body_end), | |
{State#mp.length, Rest, C2(eof)}; | |
{next_boundary, Start, Skip} -> | |
<<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, | |
C1 = Callback({body, Data}), | |
feed_mp(headers, State#mp{callback=C1(body_end), | |
buffer=Rest}); | |
{maybe, Start} -> | |
<<Data:Start/binary, Rest/binary>> = Buffer, | |
feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), | |
buffer=Rest})); | |
not_found -> | |
{Data, Rest} = {Buffer, <<>>}, | |
feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), | |
buffer=Rest})) | |
end. | |
get_boundary(ContentType) -> | |
{"multipart/form-data", Opts} = parse_header(ContentType), | |
case proplists:get_value("boundary", Opts) of | |
S when is_list(S) -> | |
S | |
end. | |
find_in_binary(B, Data) when size(B) > 0 -> | |
case size(Data) - size(B) of | |
Last when Last < 0 -> | |
partial_find(B, Data, 0, size(Data)); | |
Last -> | |
find_in_binary(B, size(B), Data, 0, Last) | |
end. | |
find_in_binary(B, BS, D, N, Last) when N =< Last-> | |
case D of | |
<<_:N/binary, B:BS/binary, _/binary>> -> | |
{exact, N}; | |
_ -> | |
find_in_binary(B, BS, D, 1 + N, Last) | |
end; | |
find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> | |
partial_find(B, D, N, BS - 1). | |
partial_find(_B, _D, _N, 0) -> | |
not_found; | |
partial_find(B, D, N, K) -> | |
<<B1:K/binary, _/binary>> = B, | |
case D of | |
<<_Skip:N/binary, B1:K/binary>> -> | |
{partial, N, K}; | |
_ -> | |
partial_find(B, D, 1 + N, K - 1) | |
end. | |
find_boundary(Prefix, Data) -> | |
case find_in_binary(Prefix, Data) of | |
{exact, Skip} -> | |
PrefixSkip = Skip + size(Prefix), | |
case Data of | |
<<_:PrefixSkip/binary, "\r\n", _/binary>> -> | |
{next_boundary, Skip, size(Prefix) + 2}; | |
<<_:PrefixSkip/binary, "--\r\n", _/binary>> -> | |
{end_boundary, Skip, size(Prefix) + 4}; | |
_ when size(Data) < PrefixSkip + 4 -> | |
%% Underflow | |
{maybe, Skip}; | |
_ -> | |
%% False positive | |
not_found | |
end; | |
{partial, Skip, Length} when (Skip + Length) =:= size(Data) -> | |
%% Underflow | |
{maybe, Skip}; | |
_ -> | |
not_found | |
end. | |
%% @spec parse_header(string()) -> {Type, [{K, V}]} | |
%% @doc Parse a Content-Type like header, return the main Content-Type | |
%% and a property list of options. | |
parse_header(String) -> | |
%% TODO: This is exactly as broken as Python's cgi module. | |
%% Should parse properly like mochiweb_cookies. | |
[Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")], | |
F = fun (S, Acc) -> | |
case lists:splitwith(fun (C) -> C =/= $= end, S) of | |
{"", _} -> | |
%% Skip anything with no name | |
Acc; | |
{_, ""} -> | |
%% Skip anything with no value | |
Acc; | |
{Name, [$\= | Value]} -> | |
[{string:to_lower(string:strip(Name)), | |
unquote_header(string:strip(Value))} | Acc] | |
end | |
end, | |
{string:to_lower(Type), | |
lists:foldr(F, [], Parts)}. | |
unquote_header("\"" ++ Rest) -> | |
unquote_header(Rest, []); | |
unquote_header(S) -> | |
S. | |
unquote_header("", Acc) -> | |
lists:reverse(Acc); | |
unquote_header("\"", Acc) -> | |
lists:reverse(Acc); | |
unquote_header([$\\, C | Rest], Acc) -> | |
unquote_header(Rest, [C | Acc]); | |
unquote_header([C | Rest], Acc) -> | |
unquote_header(Rest, [C | Acc]). | |
%%% | |
%%% HERE BE TESTS AND TEST-RELATED FUNCTIONS | |
%%% | |
with_socket_server(ServerFun, ClientFun) -> | |
{ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"}, | |
{port, 0}, | |
{loop, ServerFun}]), | |
Port = mochiweb_socket_server:get(Server, port), | |
{ok, Client} = gen_tcp:connect("127.0.0.1", Port, | |
[binary, {active, false}]), | |
Res = (catch ClientFun(Client)), | |
mochiweb_socket_server:stop(Server), | |
Res. | |
fake_request(Socket, ContentType, Length) -> | |
mochiweb_request:new(Socket, | |
'POST', | |
"/multipart", | |
{1,1}, | |
mochiweb_headers:make( | |
[{"content-type", ContentType}, | |
{"content-length", Length}])). | |
test_callback(Expect, [Expect | Rest]) -> | |
case Rest of | |
[] -> | |
ok; | |
_ -> | |
fun (Next) -> test_callback(Next, Rest) end | |
end. | |
test_parse3() -> | |
ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882", | |
BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>, | |
Expect = [{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "hidden"}]}}]}, | |
{body, <<"multipart message">>}, | |
body_end, | |
{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "file"}, {"filename", "test_file.txt"}]}}, | |
{"content-type", {"text/plain", []}}]}, | |
{body, <<"Woo multiline text file\n\nLa la la">>}, | |
body_end, | |
eof], | |
TestCallback = fun (Next) -> test_callback(Next, Expect) end, | |
ServerFun = fun (Socket) -> | |
case gen_tcp:send(Socket, BinContent) of | |
ok -> | |
exit(normal) | |
end | |
end, | |
ClientFun = fun (Socket) -> | |
Req = fake_request(Socket, ContentType, | |
size(BinContent)), | |
Res = parse_multipart_request(Req, TestCallback), | |
{0, <<>>, ok} = Res, | |
ok | |
end, | |
ok = with_socket_server(ServerFun, ClientFun), | |
ok. | |
test_parse2() -> | |
ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024", | |
BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>, | |
Expect = [{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "hidden"}]}}]}, | |
{body, <<"multipart message">>}, | |
body_end, | |
{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "file"}, {"filename", ""}]}}, | |
{"content-type", {"application/octet-stream", []}}]}, | |
{body, <<>>}, | |
body_end, | |
eof], | |
TestCallback = fun (Next) -> test_callback(Next, Expect) end, | |
ServerFun = fun (Socket) -> | |
case gen_tcp:send(Socket, BinContent) of | |
ok -> | |
exit(normal) | |
end | |
end, | |
ClientFun = fun (Socket) -> | |
Req = fake_request(Socket, ContentType, | |
size(BinContent)), | |
Res = parse_multipart_request(Req, TestCallback), | |
{0, <<>>, ok} = Res, | |
ok | |
end, | |
ok = with_socket_server(ServerFun, ClientFun), | |
ok. | |
test_parse_form() -> | |
ContentType = "multipart/form-data; boundary=AaB03x", | |
"AaB03x" = get_boundary(ContentType), | |
Content = mochiweb_util:join( | |
["--AaB03x", | |
"Content-Disposition: form-data; name=\"submit-name\"", | |
"", | |
"Larry", | |
"--AaB03x", | |
"Content-Disposition: form-data; name=\"files\";" | |
++ "filename=\"file1.txt\"", | |
"Content-Type: text/plain", | |
"", | |
"... contents of file1.txt ...", | |
"--AaB03x--", | |
""], "\r\n"), | |
BinContent = iolist_to_binary(Content), | |
ServerFun = fun (Socket) -> | |
case gen_tcp:send(Socket, BinContent) of | |
ok -> | |
exit(normal) | |
end | |
end, | |
ClientFun = fun (Socket) -> | |
Req = fake_request(Socket, ContentType, | |
size(BinContent)), | |
Res = parse_form(Req), | |
[{"submit-name", "Larry"}, | |
{"files", {"file1.txt", {"text/plain",[]}, | |
<<"... contents of file1.txt ...">>} | |
}] = Res, | |
ok | |
end, | |
ok = with_socket_server(ServerFun, ClientFun), | |
ok. | |
test_parse() -> | |
ContentType = "multipart/form-data; boundary=AaB03x", | |
"AaB03x" = get_boundary(ContentType), | |
Content = mochiweb_util:join( | |
["--AaB03x", | |
"Content-Disposition: form-data; name=\"submit-name\"", | |
"", | |
"Larry", | |
"--AaB03x", | |
"Content-Disposition: form-data; name=\"files\";" | |
++ "filename=\"file1.txt\"", | |
"Content-Type: text/plain", | |
"", | |
"... contents of file1.txt ...", | |
"--AaB03x--", | |
""], "\r\n"), | |
BinContent = iolist_to_binary(Content), | |
Expect = [{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "submit-name"}]}}]}, | |
{body, <<"Larry">>}, | |
body_end, | |
{headers, | |
[{"content-disposition", | |
{"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, | |
{"content-type", {"text/plain", []}}]}, | |
{body, <<"... contents of file1.txt ...">>}, | |
body_end, | |
eof], | |
TestCallback = fun (Next) -> test_callback(Next, Expect) end, | |
ServerFun = fun (Socket) -> | |
case gen_tcp:send(Socket, BinContent) of | |
ok -> | |
exit(normal) | |
end | |
end, | |
ClientFun = fun (Socket) -> | |
Req = fake_request(Socket, ContentType, | |
size(BinContent)), | |
Res = parse_multipart_request(Req, TestCallback), | |
{0, <<>>, ok} = Res, | |
ok | |
end, | |
ok = with_socket_server(ServerFun, ClientFun), | |
ok. | |
test_find_boundary() -> | |
B = <<"\r\n--X">>, | |
{next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>), | |
{next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>), | |
{end_boundary, 0, 9} = find_boundary(B, <<"\r\n--X--\r\nRest">>), | |
{end_boundary, 1, 9} = find_boundary(B, <<"!\r\n--X--\r\nRest">>), | |
not_found = find_boundary(B, <<"--X\r\nRest">>), | |
{maybe, 0} = find_boundary(B, <<"\r\n--X\r">>), | |
{maybe, 1} = find_boundary(B, <<"!\r\n--X\r">>), | |
P = <<"\r\n-----------------------------16037454351082272548568224146">>, | |
B0 = <<55,212,131,77,206,23,216,198,35,87,252,118,252,8,25,211,132,229, | |
182,42,29,188,62,175,247,243,4,4,0,59, 13,10,45,45,45,45,45,45,45, | |
45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45, | |
49,54,48,51,55,52,53,52,51,53,49>>, | |
{maybe, 30} = find_boundary(P, B0), | |
ok. | |
test_find_in_binary() -> | |
{exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>), | |
{exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>), | |
{exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>), | |
not_found = find_in_binary(<<"q">>, <<"foobarbaz">>), | |
{partial, 7, 2} = find_in_binary(<<"azul">>, <<"foobarbaz">>), | |
{exact, 0} = find_in_binary(<<"foobarbaz">>, <<"foobarbaz">>), | |
{partial, 0, 3} = find_in_binary(<<"foobar">>, <<"foo">>), | |
{partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>), | |
ok. | |
test() -> | |
test_find_in_binary(), | |
test_find_boundary(), | |
test_parse(), | |
test_parse2(), | |
test_parse3(), | |
test_parse_form(), | |
ok. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment