zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

cow_multipart.erl (26192B)


      1 %% Copyright (c) 2014-2018, Loïc Hoguin <essen@ninenines.eu>
      2 %%
      3 %% Permission to use, copy, modify, and/or distribute this software for any
      4 %% purpose with or without fee is hereby granted, provided that the above
      5 %% copyright notice and this permission notice appear in all copies.
      6 %%
      7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     14 
     15 -module(cow_multipart).
     16 
     17 %% Parsing.
     18 -export([parse_headers/2]).
     19 -export([parse_body/2]).
     20 
     21 %% Building.
     22 -export([boundary/0]).
     23 -export([first_part/2]).
     24 -export([part/2]).
     25 -export([close/1]).
     26 
     27 %% Headers.
     28 -export([form_data/1]).
     29 -export([parse_content_disposition/1]).
     30 -export([parse_content_transfer_encoding/1]).
     31 -export([parse_content_type/1]).
     32 
     33 -type headers() :: [{iodata(), iodata()}].
     34 -export_type([headers/0]).
     35 
     36 -include("cow_inline.hrl").
     37 
     38 -define(TEST1_MIME, <<
     39 	"This is a message with multiple parts in MIME format.\r\n"
     40 	"--frontier\r\n"
     41 	"Content-Type: text/plain\r\n"
     42 	"\r\n"
     43 	"This is the body of the message.\r\n"
     44 	"--frontier\r\n"
     45 	"Content-Type: application/octet-stream\r\n"
     46 	"Content-Transfer-Encoding: base64\r\n"
     47 	"\r\n"
     48 	"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
     49 	"Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n"
     50 	"--frontier--"
     51 >>).
     52 -define(TEST1_BOUNDARY, <<"frontier">>).
     53 
     54 -define(TEST2_MIME, <<
     55 	"--AaB03x\r\n"
     56 	"Content-Disposition: form-data; name=\"submit-name\"\r\n"
     57 	"\r\n"
     58 	"Larry\r\n"
     59 	"--AaB03x\r\n"
     60 	"Content-Disposition: form-data; name=\"files\"\r\n"
     61 	"Content-Type: multipart/mixed; boundary=BbC04y\r\n"
     62 	"\r\n"
     63 	"--BbC04y\r\n"
     64 	"Content-Disposition: file; filename=\"file1.txt\"\r\n"
     65 	"Content-Type: text/plain\r\n"
     66 	"\r\n"
     67 	"... contents of file1.txt ...\r\n"
     68 	"--BbC04y\r\n"
     69 	"Content-Disposition: file; filename=\"file2.gif\"\r\n"
     70 	"Content-Type: image/gif\r\n"
     71 	"Content-Transfer-Encoding: binary\r\n"
     72 	"\r\n"
     73 	"...contents of file2.gif...\r\n"
     74 	"--BbC04y--\r\n"
     75 	"--AaB03x--"
     76 >>).
     77 -define(TEST2_BOUNDARY, <<"AaB03x">>).
     78 
     79 -define(TEST3_MIME, <<
     80 	"This is the preamble.\r\n"
     81 	"--boundary\r\n"
     82 	"Content-Type: text/plain\r\n"
     83 	"\r\n"
     84 	"This is the body of the message.\r\n"
     85 	"--boundary--"
     86 	"\r\nThis is the epilogue. Here it includes leading CRLF"
     87 >>).
     88 -define(TEST3_BOUNDARY, <<"boundary">>).
     89 
     90 -define(TEST4_MIME, <<
     91 	"This is the preamble.\r\n"
     92 	"--boundary\r\n"
     93 	"Content-Type: text/plain\r\n"
     94 	"\r\n"
     95 	"This is the body of the message.\r\n"
     96 	"--boundary--"
     97 	"\r\n"
     98 >>).
     99 -define(TEST4_BOUNDARY, <<"boundary">>).
    100 
    101 %% RFC 2046, Section 5.1.1
    102 -define(TEST5_MIME, <<
    103         "This is the preamble.  It is to be ignored, though it\r\n"
    104         "is a handy place for composition agents to include an\r\n"
    105         "explanatory note to non-MIME conformant readers.\r\n"
    106         "\r\n"
    107         "--simple boundary\r\n",
    108         "\r\n"
    109         "This is implicitly typed plain US-ASCII text.\r\n"
    110         "It does NOT end with a linebreak."
    111         "\r\n"
    112         "--simple boundary\r\n",
    113         "Content-type: text/plain; charset=us-ascii\r\n"
    114         "\r\n"
    115         "This is explicitly typed plain US-ASCII text.\r\n"
    116         "It DOES end with a linebreak.\r\n"
    117         "\r\n"
    118         "--simple boundary--\r\n"
    119         "\r\n"
    120         "This is the epilogue.  It is also to be ignored."
    121 >>).
    122 -define(TEST5_BOUNDARY, <<"simple boundary">>).
    123 
    124 %% Parsing.
    125 %%
    126 %% The multipart format is defined in RFC 2045.
    127 
    128 %% @doc Parse the headers for the next multipart part.
    129 %%
    130 %% This function skips any preamble before the boundary.
    131 %% The preamble may be retrieved using parse_body/2.
    132 %%
    133 %% This function will accept input of any size, it is
    134 %% up to the caller to limit it if needed.
    135 
    136 -spec parse_headers(binary(), binary())
    137 	-> more | {more, binary()}
    138 	| {ok, headers(), binary()}
    139 	| {done, binary()}.
    140 %% If the stream starts with the boundary we can make a few assumptions
    141 %% and quickly figure out if we got the complete list of headers.
    142 parse_headers(<< "--", Stream/bits >>, Boundary) ->
    143 	BoundarySize = byte_size(Boundary),
    144 	case Stream of
    145 		%% Last boundary. Return the epilogue.
    146 		<< Boundary:BoundarySize/binary, "--", Stream2/bits >> ->
    147 			{done, Stream2};
    148 		<< Boundary:BoundarySize/binary, Stream2/bits >> ->
    149 			%% We have all the headers only if there is a \r\n\r\n
    150 			%% somewhere in the data after the boundary.
    151 			case binary:match(Stream2, <<"\r\n\r\n">>) of
    152 				nomatch ->
    153 					more;
    154 				_ ->
    155 					before_parse_headers(Stream2)
    156 			end;
    157 		%% If there isn't enough to represent Boundary \r\n\r\n
    158 		%% then we definitely don't have all the headers.
    159 		_ when byte_size(Stream) < byte_size(Boundary) + 4 ->
    160 			more;
    161 		%% Otherwise we have preamble data to skip.
    162 		%% We still got rid of the first two misleading bytes.
    163 		_ ->
    164 			skip_preamble(Stream, Boundary)
    165 	end;
    166 %% Otherwise we have preamble data to skip.
    167 parse_headers(Stream, Boundary) ->
    168 	skip_preamble(Stream, Boundary).
    169 
    170 %% We need to find the boundary and a \r\n\r\n after that.
    171 %% Since the boundary isn't at the start, it must be right
    172 %% after a \r\n too.
    173 skip_preamble(Stream, Boundary) ->
    174 	case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of
    175 		%% No boundary, need more data.
    176 		nomatch ->
    177 			%% We can safely skip the size of the stream
    178 			%% minus the last 3 bytes which may be a partial boundary.
    179 			SkipSize = byte_size(Stream) - 3,
    180 			case SkipSize > 0 of
    181 				false ->
    182 					more;
    183 				true ->
    184 					<< _:SkipSize/binary, Stream2/bits >> = Stream,
    185 					{more, Stream2}
    186 			end;
    187 		{Start, Length} ->
    188 			Start2 = Start + Length,
    189 			<< _:Start2/binary, Stream2/bits >> = Stream,
    190 			case Stream2 of
    191 				%% Last boundary. Return the epilogue.
    192 				<< "--", Stream3/bits >> ->
    193 					{done, Stream3};
    194 				_ ->
    195 					case binary:match(Stream, <<"\r\n\r\n">>) of
    196 						%% We don't have the full headers.
    197 						nomatch ->
    198 							{more, Stream2};
    199 						_ ->
    200 							before_parse_headers(Stream2)
    201 					end
    202 			end
    203 	end.
    204 
    205 before_parse_headers(<< "\r\n\r\n", Stream/bits >>) ->
    206 	%% This indicates that there are no headers, so we can abort immediately.
    207 	{ok, [], Stream};
    208 before_parse_headers(<< "\r\n", Stream/bits >>) ->
    209 	%% There is a line break right after the boundary, skip it.
    210 	parse_hd_name(Stream, [], <<>>).
    211 
    212 parse_hd_name(<< C, Rest/bits >>, H, SoFar) ->
    213 	case C of
    214 		$: -> parse_hd_before_value(Rest, H, SoFar);
    215 		$\s -> parse_hd_name_ws(Rest, H, SoFar);
    216 		$\t -> parse_hd_name_ws(Rest, H, SoFar);
    217 		_ -> ?LOWER(parse_hd_name, Rest, H, SoFar)
    218 	end.
    219 
    220 parse_hd_name_ws(<< C, Rest/bits >>, H, Name) ->
    221 	case C of
    222 		$\s -> parse_hd_name_ws(Rest, H, Name);
    223 		$\t -> parse_hd_name_ws(Rest, H, Name);
    224 		$: -> parse_hd_before_value(Rest, H, Name)
    225 	end.
    226 
    227 parse_hd_before_value(<< $\s, Rest/bits >>, H, N) ->
    228 	parse_hd_before_value(Rest, H, N);
    229 parse_hd_before_value(<< $\t, Rest/bits >>, H, N) ->
    230 	parse_hd_before_value(Rest, H, N);
    231 parse_hd_before_value(Buffer, H, N) ->
    232 	parse_hd_value(Buffer, H, N, <<>>).
    233 
    234 parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) ->
    235 	case Rest of
    236 		<< "\n\r\n", Rest2/bits >> ->
    237 			{ok, [{Name, SoFar}|Headers], Rest2};
    238 		<< $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t ->
    239 			parse_hd_value(Rest2, Headers, Name, SoFar);
    240 		<< $\n, Rest2/bits >> ->
    241 			parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>)
    242 	end;
    243 parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) ->
    244 	parse_hd_value(Rest, H, N, << SoFar/binary, C >>).
    245 
    246 %% @doc Parse the body of the current multipart part.
    247 %%
    248 %% The body is everything until the next boundary.
    249 
    250 -spec parse_body(binary(), binary())
    251 	-> {ok, binary()} | {ok, binary(), binary()}
    252 	| done | {done, binary()} | {done, binary(), binary()}.
    253 parse_body(Stream, Boundary) ->
    254 	BoundarySize = byte_size(Boundary),
    255 	case Stream of
    256 		<< "--", Boundary:BoundarySize/binary, _/bits >> ->
    257 			done;
    258 		_ ->
    259 			case binary:match(Stream, << "\r\n--", Boundary/bits >>) of
    260 				%% No boundary, check for a possible partial at the end.
    261 				%% Return more or less of the body depending on the result.
    262 				nomatch ->
    263 					StreamSize = byte_size(Stream),
    264 					From = StreamSize - BoundarySize - 3,
    265 					MatchOpts = if
    266 						%% Binary too small to contain boundary, check it fully.
    267 						From < 0 -> [];
    268 						%% Optimize, only check the end of the binary.
    269 						true -> [{scope, {From, StreamSize - From}}]
    270 					end,
    271 					case binary:match(Stream, <<"\r">>, MatchOpts) of
    272 						nomatch ->
    273 							{ok, Stream};
    274 						{Pos, _} ->
    275 							case Stream of
    276 								<< Body:Pos/binary >> ->
    277 									{ok, Body};
    278 								<< Body:Pos/binary, Rest/bits >> ->
    279 									{ok, Body, Rest}
    280 							end
    281 					end;
    282 				%% Boundary found, this is the last chunk of the body.
    283 				{Pos, _} ->
    284 					case Stream of
    285 						<< Body:Pos/binary, "\r\n" >> ->
    286 							{done, Body};
    287 						<< Body:Pos/binary, "\r\n", Rest/bits >> ->
    288 							{done, Body, Rest};
    289 						<< Body:Pos/binary, Rest/bits >> ->
    290 							{done, Body, Rest}
    291 					end
    292 			end
    293 	end.
    294 
    295 -ifdef(TEST).
    296 parse_test() ->
    297 	H1 = [{<<"content-type">>, <<"text/plain">>}],
    298 	Body1 = <<"This is the body of the message.">>,
    299 	H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>},
    300 		{<<"content-transfer-encoding">>, <<"base64">>}]),
    301 	Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
    302 		"Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
    303 	{ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY),
    304 	{done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY),
    305 	done = parse_body(Rest2, ?TEST1_BOUNDARY),
    306 	{ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY),
    307 	H2 = lists:sort(H2Unsorted),
    308 	{done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY),
    309 	done = parse_body(Rest4, ?TEST1_BOUNDARY),
    310 	{done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY),
    311 	ok.
    312 
    313 parse_interleaved_test() ->
    314 	H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}],
    315 	Body1 = <<"Larry">>,
    316 	H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>},
    317 		{<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]),
    318 	InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>},
    319 		{<<"content-type">>, <<"text/plain">>}]),
    320 	InBody1 = <<"... contents of file1.txt ...">>,
    321 	InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>},
    322 		{<<"content-type">>, <<"image/gif">>},
    323 		{<<"content-transfer-encoding">>, <<"binary">>}]),
    324 	InBody2 = <<"...contents of file2.gif...">>,
    325 	{ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY),
    326 	{done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY),
    327 	done = parse_body(Rest2, ?TEST2_BOUNDARY),
    328 	{ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY),
    329 	H2 = lists:sort(H2Unsorted),
    330 	{_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2),
    331 	{<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]}
    332 		= parse_content_type(ContentType),
    333 	{ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary),
    334 	InH1 = lists:sort(InH1Unsorted),
    335 	{done, InBody1, InRest2} = parse_body(InRest, InBoundary),
    336 	done = parse_body(InRest2, InBoundary),
    337 	{ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary),
    338 	InH2 = lists:sort(InH2Unsorted),
    339 	{done, InBody2, InRest4} = parse_body(InRest3, InBoundary),
    340 	done = parse_body(InRest4, InBoundary),
    341 	{done, Rest4} = parse_headers(InRest4, InBoundary),
    342 	{done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY),
    343 	ok.
    344 
    345 parse_epilogue_test() ->
    346 	H1 = [{<<"content-type">>, <<"text/plain">>}],
    347 	Body1 = <<"This is the body of the message.">>,
    348 	Epilogue = <<"\r\nThis is the epilogue. Here it includes leading CRLF">>,
    349 	{ok, H1, Rest} = parse_headers(?TEST3_MIME, ?TEST3_BOUNDARY),
    350 	{done, Body1, Rest2} = parse_body(Rest, ?TEST3_BOUNDARY),
    351 	done = parse_body(Rest2, ?TEST3_BOUNDARY),
    352 	{done, Epilogue} = parse_headers(Rest2, ?TEST3_BOUNDARY),
    353 	ok.
    354 
    355 parse_epilogue_crlf_test() ->
    356 	H1 = [{<<"content-type">>, <<"text/plain">>}],
    357 	Body1 = <<"This is the body of the message.">>,
    358 	Epilogue = <<"\r\n">>,
    359 	{ok, H1, Rest} = parse_headers(?TEST4_MIME, ?TEST4_BOUNDARY),
    360 	{done, Body1, Rest2} = parse_body(Rest, ?TEST4_BOUNDARY),
    361 	done = parse_body(Rest2, ?TEST4_BOUNDARY),
    362 	{done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY),
    363 	ok.
    364 
    365 parse_rfc2046_test() ->
    366 	%% The following is an example included in RFC 2046, Section 5.1.1.
    367 	Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n"
    368 		"It does NOT end with a linebreak.">>,
    369 	Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n"
    370 		"It DOES end with a linebreak.\r\n">>,
    371 	H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}],
    372 	Epilogue = <<"\r\n\r\nThis is the epilogue.  It is also to be ignored.">>,
    373 	{ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY),
    374 	{done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY),
    375 	{ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY),
    376 	{done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY),
    377 	{done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY),
    378 	ok.
    379 
    380 parse_partial_test() ->
    381 	{ok, <<0:8000, "abcdef">>, <<"\rghij">>}
    382 		= parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>),
    383 	{ok, <<"abcdef">>, <<"\rghij">>}
    384 		= parse_body(<<"abcdef\rghij">>, <<"boundary">>),
    385 	{ok, <<"abc">>, <<"\rdef">>}
    386 		= parse_body(<<"abc\rdef">>, <<"boundaryboundary">>),
    387 	{ok, <<0:8000, "abcdef">>, <<"\r\nghij">>}
    388 		= parse_body(<<0:8000, "abcdef\r\nghij">>, <<"boundary">>),
    389 	{ok, <<"abcdef">>, <<"\r\nghij">>}
    390 		= parse_body(<<"abcdef\r\nghij">>, <<"boundary">>),
    391 	{ok, <<"abc">>, <<"\r\ndef">>}
    392 		= parse_body(<<"abc\r\ndef">>, <<"boundaryboundary">>),
    393 	{ok, <<"boundary">>, <<"\r">>}
    394 		= parse_body(<<"boundary\r">>, <<"boundary">>),
    395 	{ok, <<"boundary">>, <<"\r\n">>}
    396 		= parse_body(<<"boundary\r\n">>, <<"boundary">>),
    397 	{ok, <<"boundary">>, <<"\r\n-">>}
    398 		= parse_body(<<"boundary\r\n-">>, <<"boundary">>),
    399 	{ok, <<"boundary">>, <<"\r\n--">>}
    400 		= parse_body(<<"boundary\r\n--">>, <<"boundary">>),
    401 	ok.
    402 
    403 perf_parse_multipart(Stream, Boundary) ->
    404 	case parse_headers(Stream, Boundary) of
    405 		{ok, _, Rest} ->
    406 			{_, _, Rest2} = parse_body(Rest, Boundary),
    407 			perf_parse_multipart(Rest2, Boundary);
    408 		{done, _} ->
    409 			ok
    410 	end.
    411 
    412 horse_parse() ->
    413 	horse:repeat(50000,
    414 		perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY)
    415 	).
    416 -endif.
    417 
    418 %% Building.
    419 
    420 %% @doc Generate a new random boundary.
    421 %%
    422 %% The boundary generated has a low probability of ever appearing
    423 %% in the data.
    424 
    425 -spec boundary() -> binary().
    426 boundary() ->
    427 	cow_base64url:encode(crypto:strong_rand_bytes(48), #{padding => false}).
    428 
    429 %% @doc Return the first part's head.
    430 %%
    431 %% This works exactly like the part/2 function except there is
    432 %% no leading \r\n. It's not required to use this function,
    433 %% just makes the output a little smaller and prettier.
    434 
    435 -spec first_part(binary(), headers()) -> iodata().
    436 first_part(Boundary, Headers) ->
    437 	[<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
    438 
    439 %% @doc Return a part's head.
    440 
    441 -spec part(binary(), headers()) -> iodata().
    442 part(Boundary, Headers) ->
    443 	[<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
    444 
    445 headers_to_iolist([], Acc) ->
    446 	lists:reverse([<<"\r\n">>|Acc]);
    447 headers_to_iolist([{N, V}|Tail], Acc) ->
    448 	%% We don't want to create a sublist so we list the
    449 	%% values in reverse order so that it gets reversed properly.
    450 	headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]).
    451 
    452 %% @doc Return the closing delimiter of the multipart message.
    453 
    454 -spec close(binary()) -> iodata().
    455 close(Boundary) ->
    456 	[<<"\r\n--">>, Boundary, <<"--">>].
    457 
    458 -ifdef(TEST).
    459 build_test() ->
    460 	Result = string:to_lower(binary_to_list(?TEST1_MIME)),
    461 	Result = string:to_lower(binary_to_list(iolist_to_binary([
    462 		<<"This is a message with multiple parts in MIME format.\r\n">>,
    463 		first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]),
    464 		<<"This is the body of the message.">>,
    465 		part(?TEST1_BOUNDARY, [
    466 			{<<"content-type">>, <<"application/octet-stream">>},
    467 			{<<"content-transfer-encoding">>, <<"base64">>}]),
    468 		<<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
    469 			"Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
    470 		close(?TEST1_BOUNDARY)
    471 	]))),
    472 	ok.
    473 
    474 identity_test() ->
    475 	B = boundary(),
    476 	Preamble = <<"This is a message with multiple parts in MIME format.">>,
    477 	H1 = [{<<"content-type">>, <<"text/plain">>}],
    478 	Body1 = <<"This is the body of the message.">>,
    479 	H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>},
    480 		{<<"content-transfer-encoding">>, <<"base64">>}]),
    481 	Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
    482 		"Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
    483 	Epilogue = <<"Gotta go fast!">>,
    484 	M = iolist_to_binary([
    485 		Preamble,
    486 		part(B, H1), Body1,
    487 		part(B, H2), Body2,
    488 		close(B),
    489 		Epilogue
    490 	]),
    491 	{done, Preamble, M2} = parse_body(M, B),
    492 	{ok, H1, M3} = parse_headers(M2, B),
    493 	{done, Body1, M4} = parse_body(M3, B),
    494 	{ok, H2Unsorted, M5} = parse_headers(M4, B),
    495 	H2 = lists:sort(H2Unsorted),
    496 	{done, Body2, M6} = parse_body(M5, B),
    497 	{done, Epilogue} = parse_headers(M6, B),
    498 	ok.
    499 
    500 perf_build_multipart() ->
    501 	B = boundary(),
    502 	[
    503 		<<"preamble\r\n">>,
    504 		first_part(B, [{<<"content-type">>, <<"text/plain">>}]),
    505 		<<"This is the body of the message.">>,
    506 		part(B, [
    507 			{<<"content-type">>, <<"application/octet-stream">>},
    508 			{<<"content-transfer-encoding">>, <<"base64">>}]),
    509 		<<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
    510 			"Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
    511 		close(B),
    512 		<<"epilogue">>
    513 	].
    514 
    515 horse_build() ->
    516 	horse:repeat(50000,
    517 		perf_build_multipart()
    518 	).
    519 -endif.
    520 
    521 %% Headers.
    522 
    523 %% @doc Convenience function for extracting information from headers
    524 %% when parsing a multipart/form-data stream.
    525 
    526 -spec form_data(headers() | #{binary() => binary()})
    527 	-> {data, binary()}
    528 	| {file, binary(), binary(), binary()}.
    529 form_data(Headers) when is_map(Headers) ->
    530 	form_data(maps:to_list(Headers));
    531 form_data(Headers) ->
    532 	{_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers),
    533 	{<<"form-data">>, Params} = parse_content_disposition(DispositionBin),
    534 	{_, FieldName} = lists:keyfind(<<"name">>, 1, Params),
    535 	case lists:keyfind(<<"filename">>, 1, Params) of
    536 		false ->
    537 			{data, FieldName};
    538 		{_, Filename} ->
    539 			Type = case lists:keyfind(<<"content-type">>, 1, Headers) of
    540 				false -> <<"text/plain">>;
    541 				{_, T} -> T
    542 			end,
    543 			{file, FieldName, Filename, Type}
    544 	end.
    545 
    546 -ifdef(TEST).
    547 form_data_test_() ->
    548 	Tests = [
    549 		{[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}],
    550 			{data, <<"submit-name">>}},
    551 		{[{<<"content-disposition">>,
    552 				<<"form-data; name=\"files\"; filename=\"file1.txt\"">>},
    553 			{<<"content-type">>, <<"text/x-plain">>}],
    554 			{file, <<"files">>, <<"file1.txt">>, <<"text/x-plain">>}}
    555 	],
    556 	[{lists:flatten(io_lib:format("~p", [V])),
    557 		fun() -> R = form_data(V) end} || {V, R} <- Tests].
    558 -endif.
    559 
    560 %% @todo parse_content_description
    561 %% @todo parse_content_id
    562 
    563 %% @doc Parse an RFC 2183 content-disposition value.
    564 %% @todo Support RFC 2231.
    565 
    566 -spec parse_content_disposition(binary())
    567 	-> {binary(), [{binary(), binary()}]}.
    568 parse_content_disposition(Bin) ->
    569 	parse_cd_type(Bin, <<>>).
    570 
    571 parse_cd_type(<<>>, Acc) ->
    572 	{Acc, []};
    573 parse_cd_type(<< C, Rest/bits >>, Acc) ->
    574 	case C of
    575 		$; -> {Acc, parse_before_param(Rest, [])};
    576 		$\s -> {Acc, parse_before_param(Rest, [])};
    577 		$\t -> {Acc, parse_before_param(Rest, [])};
    578 		_ -> ?LOWER(parse_cd_type, Rest, Acc)
    579 	end.
    580 
    581 -ifdef(TEST).
    582 parse_content_disposition_test_() ->
    583 	Tests = [
    584 		{<<"inline">>, {<<"inline">>, []}},
    585 		{<<"attachment">>, {<<"attachment">>, []}},
    586 		{<<"attachment; filename=genome.jpeg;"
    587 			"  modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>,
    588 			{<<"attachment">>, [
    589 				{<<"filename">>, <<"genome.jpeg">>},
    590 				{<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}
    591 			]}},
    592 		{<<"form-data; name=\"user\"">>,
    593 			{<<"form-data">>, [{<<"name">>, <<"user">>}]}},
    594 		{<<"form-data; NAME=\"submit-name\"">>,
    595 			{<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}},
    596 		{<<"form-data; name=\"files\"; filename=\"file1.txt\"">>,
    597 			{<<"form-data">>, [
    598 				{<<"name">>, <<"files">>},
    599 				{<<"filename">>, <<"file1.txt">>}
    600 			]}},
    601 		{<<"file; filename=\"file1.txt\"">>,
    602 			{<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}},
    603 		{<<"file; filename=\"file2.gif\"">>,
    604 			{<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}}
    605 	],
    606 	[{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests].
    607 
    608 horse_parse_content_disposition_attachment() ->
    609 	horse:repeat(100000,
    610 		parse_content_disposition(<<"attachment; filename=genome.jpeg;"
    611 			"  modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>)
    612 	).
    613 
    614 horse_parse_content_disposition_form_data() ->
    615 	horse:repeat(100000,
    616 		parse_content_disposition(
    617 			<<"form-data; name=\"files\"; filename=\"file1.txt\"">>)
    618 	).
    619 
    620 horse_parse_content_disposition_inline() ->
    621 	horse:repeat(100000,
    622 		parse_content_disposition(<<"inline">>)
    623 	).
    624 -endif.
    625 
    626 %% @doc Parse an RFC 2045 content-transfer-encoding header.
    627 
    628 -spec parse_content_transfer_encoding(binary()) -> binary().
    629 parse_content_transfer_encoding(Bin) ->
    630 	?LOWER(Bin).
    631 
    632 -ifdef(TEST).
    633 parse_content_transfer_encoding_test_() ->
    634 	Tests = [
    635 		{<<"7bit">>, <<"7bit">>},
    636 		{<<"7BIT">>, <<"7bit">>},
    637 		{<<"8bit">>, <<"8bit">>},
    638 		{<<"binary">>, <<"binary">>},
    639 		{<<"quoted-printable">>, <<"quoted-printable">>},
    640 		{<<"base64">>, <<"base64">>},
    641 		{<<"Base64">>, <<"base64">>},
    642 		{<<"BASE64">>, <<"base64">>},
    643 		{<<"bAsE64">>, <<"base64">>}
    644 	],
    645 	[{V, fun() -> R = parse_content_transfer_encoding(V) end}
    646 		|| {V, R} <- Tests].
    647 
    648 horse_parse_content_transfer_encoding() ->
    649 	horse:repeat(100000,
    650 		parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>)
    651 	).
    652 -endif.
    653 
    654 %% @doc Parse an RFC 2045 content-type header.
    655 
    656 -spec parse_content_type(binary())
    657 	-> {binary(), binary(), [{binary(), binary()}]}.
    658 parse_content_type(Bin) ->
    659 	parse_ct_type(Bin, <<>>).
    660 
    661 parse_ct_type(<< C, Rest/bits >>, Acc) ->
    662 	case C of
    663 		$/ -> parse_ct_subtype(Rest, Acc, <<>>);
    664 		_ -> ?LOWER(parse_ct_type, Rest, Acc)
    665 	end.
    666 
    667 parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> ->
    668 	{Type, Subtype, []};
    669 parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) ->
    670 	case C of
    671 		$; -> {Type, Acc, parse_before_param(Rest, [])};
    672 		$\s -> {Type, Acc, parse_before_param(Rest, [])};
    673 		$\t -> {Type, Acc, parse_before_param(Rest, [])};
    674 		_ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc)
    675 	end.
    676 
    677 -ifdef(TEST).
    678 parse_content_type_test_() ->
    679 	Tests = [
    680 		{<<"image/gif">>,
    681 			{<<"image">>, <<"gif">>, []}},
    682 		{<<"text/plain">>,
    683 			{<<"text">>, <<"plain">>, []}},
    684 		{<<"text/plain; charset=us-ascii">>,
    685 			{<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}},
    686 		{<<"text/plain; charset=\"us-ascii\"">>,
    687 			{<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}},
    688 		{<<"multipart/form-data; boundary=AaB03x">>,
    689 			{<<"multipart">>, <<"form-data">>,
    690 				[{<<"boundary">>, <<"AaB03x">>}]}},
    691 		{<<"multipart/mixed; boundary=BbC04y">>,
    692 			{<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}},
    693 		{<<"multipart/mixed; boundary=--------">>,
    694 			{<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}},
    695 		{<<"application/x-horse; filename=genome.jpeg;"
    696 				"  some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";"
    697 				"  charset=us-ascii; empty=; number=12345">>,
    698 			{<<"application">>, <<"x-horse">>, [
    699 				{<<"filename">>, <<"genome.jpeg">>},
    700 				{<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>},
    701 				{<<"charset">>, <<"us-ascii">>},
    702 				{<<"empty">>, <<>>},
    703 				{<<"number">>, <<"12345">>}
    704 			]}}
    705 	],
    706 	[{V, fun() -> R = parse_content_type(V) end}
    707 		|| {V, R} <- Tests].
    708 
    709 horse_parse_content_type_zero() ->
    710 	horse:repeat(100000,
    711 		parse_content_type(<<"text/plain">>)
    712 	).
    713 
    714 horse_parse_content_type_one() ->
    715 	horse:repeat(100000,
    716 		parse_content_type(<<"text/plain; charset=\"us-ascii\"">>)
    717 	).
    718 
    719 horse_parse_content_type_five() ->
    720 	horse:repeat(100000,
    721 		parse_content_type(<<"application/x-horse; filename=genome.jpeg;"
    722 			"  some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";"
    723 			"  charset=us-ascii; empty=; number=12345">>)
    724 	).
    725 -endif.
    726 
    727 %% @doc Parse RFC 2045 parameters.
    728 
    729 parse_before_param(<<>>, Params) ->
    730 	lists:reverse(Params);
    731 parse_before_param(<< C, Rest/bits >>, Params) ->
    732 	case C of
    733 		$; -> parse_before_param(Rest, Params);
    734 		$\s -> parse_before_param(Rest, Params);
    735 		$\t -> parse_before_param(Rest, Params);
    736 		_ -> ?LOWER(parse_param_name, Rest, Params, <<>>)
    737 	end.
    738 
    739 parse_param_name(<<>>, Params, Acc) ->
    740 	lists:reverse([{Acc, <<>>}|Params]);
    741 parse_param_name(<< C, Rest/bits >>, Params, Acc) ->
    742 	case C of
    743 		$= -> parse_param_value(Rest, Params, Acc);
    744 		_ -> ?LOWER(parse_param_name, Rest, Params, Acc)
    745 	end.
    746 
    747 parse_param_value(<<>>, Params, Name) ->
    748 	lists:reverse([{Name, <<>>}|Params]);
    749 parse_param_value(<< C, Rest/bits >>, Params, Name) ->
    750 	case C of
    751 		$" -> parse_param_quoted_value(Rest, Params, Name, <<>>);
    752 		$; -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    753 		$\s -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    754 		$\t -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    755 		C -> parse_param_value(Rest, Params, Name, << C >>)
    756 	end.
    757 
    758 parse_param_value(<<>>, Params, Name, Acc) ->
    759 	lists:reverse([{Name, Acc}|Params]);
    760 parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) ->
    761 	case C of
    762 		$; -> parse_before_param(Rest, [{Name, Acc}|Params]);
    763 		$\s -> parse_before_param(Rest, [{Name, Acc}|Params]);
    764 		$\t -> parse_before_param(Rest, [{Name, Acc}|Params]);
    765 		C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>)
    766 	end.
    767 
    768 %% We expect a final $" so no need to test for <<>>.
    769 parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) ->
    770 	parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>);
    771 parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) ->
    772 	parse_before_param(Rest, [{Name, Acc}|Params]);
    773 parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc)
    774 		when C =/= $\r ->
    775 	parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).