cow_multipart.erl (26192B)
1 %% Copyright (c) 2014-2018, Loïc Hoguin <essen@ninenines.eu> 2 %% 3 %% Permission to use, copy, modify, and/or distribute this software for any 4 %% purpose with or without fee is hereby granted, provided that the above 5 %% copyright notice and this permission notice appear in all copies. 6 %% 7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 -module(cow_multipart). 16 17 %% Parsing. 18 -export([parse_headers/2]). 19 -export([parse_body/2]). 20 21 %% Building. 22 -export([boundary/0]). 23 -export([first_part/2]). 24 -export([part/2]). 25 -export([close/1]). 26 27 %% Headers. 28 -export([form_data/1]). 29 -export([parse_content_disposition/1]). 30 -export([parse_content_transfer_encoding/1]). 31 -export([parse_content_type/1]). 32 33 -type headers() :: [{iodata(), iodata()}]. 34 -export_type([headers/0]). 35 36 -include("cow_inline.hrl"). 37 38 -define(TEST1_MIME, << 39 "This is a message with multiple parts in MIME format.\r\n" 40 "--frontier\r\n" 41 "Content-Type: text/plain\r\n" 42 "\r\n" 43 "This is the body of the message.\r\n" 44 "--frontier\r\n" 45 "Content-Type: application/octet-stream\r\n" 46 "Content-Transfer-Encoding: base64\r\n" 47 "\r\n" 48 "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" 49 "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n" 50 "--frontier--" 51 >>). 52 -define(TEST1_BOUNDARY, <<"frontier">>). 53 54 -define(TEST2_MIME, << 55 "--AaB03x\r\n" 56 "Content-Disposition: form-data; name=\"submit-name\"\r\n" 57 "\r\n" 58 "Larry\r\n" 59 "--AaB03x\r\n" 60 "Content-Disposition: form-data; name=\"files\"\r\n" 61 "Content-Type: multipart/mixed; boundary=BbC04y\r\n" 62 "\r\n" 63 "--BbC04y\r\n" 64 "Content-Disposition: file; filename=\"file1.txt\"\r\n" 65 "Content-Type: text/plain\r\n" 66 "\r\n" 67 "... contents of file1.txt ...\r\n" 68 "--BbC04y\r\n" 69 "Content-Disposition: file; filename=\"file2.gif\"\r\n" 70 "Content-Type: image/gif\r\n" 71 "Content-Transfer-Encoding: binary\r\n" 72 "\r\n" 73 "...contents of file2.gif...\r\n" 74 "--BbC04y--\r\n" 75 "--AaB03x--" 76 >>). 77 -define(TEST2_BOUNDARY, <<"AaB03x">>). 78 79 -define(TEST3_MIME, << 80 "This is the preamble.\r\n" 81 "--boundary\r\n" 82 "Content-Type: text/plain\r\n" 83 "\r\n" 84 "This is the body of the message.\r\n" 85 "--boundary--" 86 "\r\nThis is the epilogue. Here it includes leading CRLF" 87 >>). 88 -define(TEST3_BOUNDARY, <<"boundary">>). 89 90 -define(TEST4_MIME, << 91 "This is the preamble.\r\n" 92 "--boundary\r\n" 93 "Content-Type: text/plain\r\n" 94 "\r\n" 95 "This is the body of the message.\r\n" 96 "--boundary--" 97 "\r\n" 98 >>). 99 -define(TEST4_BOUNDARY, <<"boundary">>). 100 101 %% RFC 2046, Section 5.1.1 102 -define(TEST5_MIME, << 103 "This is the preamble. It is to be ignored, though it\r\n" 104 "is a handy place for composition agents to include an\r\n" 105 "explanatory note to non-MIME conformant readers.\r\n" 106 "\r\n" 107 "--simple boundary\r\n", 108 "\r\n" 109 "This is implicitly typed plain US-ASCII text.\r\n" 110 "It does NOT end with a linebreak." 111 "\r\n" 112 "--simple boundary\r\n", 113 "Content-type: text/plain; charset=us-ascii\r\n" 114 "\r\n" 115 "This is explicitly typed plain US-ASCII text.\r\n" 116 "It DOES end with a linebreak.\r\n" 117 "\r\n" 118 "--simple boundary--\r\n" 119 "\r\n" 120 "This is the epilogue. It is also to be ignored." 121 >>). 122 -define(TEST5_BOUNDARY, <<"simple boundary">>). 123 124 %% Parsing. 125 %% 126 %% The multipart format is defined in RFC 2045. 127 128 %% @doc Parse the headers for the next multipart part. 129 %% 130 %% This function skips any preamble before the boundary. 131 %% The preamble may be retrieved using parse_body/2. 132 %% 133 %% This function will accept input of any size, it is 134 %% up to the caller to limit it if needed. 135 136 -spec parse_headers(binary(), binary()) 137 -> more | {more, binary()} 138 | {ok, headers(), binary()} 139 | {done, binary()}. 140 %% If the stream starts with the boundary we can make a few assumptions 141 %% and quickly figure out if we got the complete list of headers. 142 parse_headers(<< "--", Stream/bits >>, Boundary) -> 143 BoundarySize = byte_size(Boundary), 144 case Stream of 145 %% Last boundary. Return the epilogue. 146 << Boundary:BoundarySize/binary, "--", Stream2/bits >> -> 147 {done, Stream2}; 148 << Boundary:BoundarySize/binary, Stream2/bits >> -> 149 %% We have all the headers only if there is a \r\n\r\n 150 %% somewhere in the data after the boundary. 151 case binary:match(Stream2, <<"\r\n\r\n">>) of 152 nomatch -> 153 more; 154 _ -> 155 before_parse_headers(Stream2) 156 end; 157 %% If there isn't enough to represent Boundary \r\n\r\n 158 %% then we definitely don't have all the headers. 159 _ when byte_size(Stream) < byte_size(Boundary) + 4 -> 160 more; 161 %% Otherwise we have preamble data to skip. 162 %% We still got rid of the first two misleading bytes. 163 _ -> 164 skip_preamble(Stream, Boundary) 165 end; 166 %% Otherwise we have preamble data to skip. 167 parse_headers(Stream, Boundary) -> 168 skip_preamble(Stream, Boundary). 169 170 %% We need to find the boundary and a \r\n\r\n after that. 171 %% Since the boundary isn't at the start, it must be right 172 %% after a \r\n too. 173 skip_preamble(Stream, Boundary) -> 174 case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of 175 %% No boundary, need more data. 176 nomatch -> 177 %% We can safely skip the size of the stream 178 %% minus the last 3 bytes which may be a partial boundary. 179 SkipSize = byte_size(Stream) - 3, 180 case SkipSize > 0 of 181 false -> 182 more; 183 true -> 184 << _:SkipSize/binary, Stream2/bits >> = Stream, 185 {more, Stream2} 186 end; 187 {Start, Length} -> 188 Start2 = Start + Length, 189 << _:Start2/binary, Stream2/bits >> = Stream, 190 case Stream2 of 191 %% Last boundary. Return the epilogue. 192 << "--", Stream3/bits >> -> 193 {done, Stream3}; 194 _ -> 195 case binary:match(Stream, <<"\r\n\r\n">>) of 196 %% We don't have the full headers. 197 nomatch -> 198 {more, Stream2}; 199 _ -> 200 before_parse_headers(Stream2) 201 end 202 end 203 end. 204 205 before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> 206 %% This indicates that there are no headers, so we can abort immediately. 207 {ok, [], Stream}; 208 before_parse_headers(<< "\r\n", Stream/bits >>) -> 209 %% There is a line break right after the boundary, skip it. 210 parse_hd_name(Stream, [], <<>>). 211 212 parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> 213 case C of 214 $: -> parse_hd_before_value(Rest, H, SoFar); 215 $\s -> parse_hd_name_ws(Rest, H, SoFar); 216 $\t -> parse_hd_name_ws(Rest, H, SoFar); 217 _ -> ?LOWER(parse_hd_name, Rest, H, SoFar) 218 end. 219 220 parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> 221 case C of 222 $\s -> parse_hd_name_ws(Rest, H, Name); 223 $\t -> parse_hd_name_ws(Rest, H, Name); 224 $: -> parse_hd_before_value(Rest, H, Name) 225 end. 226 227 parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> 228 parse_hd_before_value(Rest, H, N); 229 parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> 230 parse_hd_before_value(Rest, H, N); 231 parse_hd_before_value(Buffer, H, N) -> 232 parse_hd_value(Buffer, H, N, <<>>). 233 234 parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> 235 case Rest of 236 << "\n\r\n", Rest2/bits >> -> 237 {ok, [{Name, SoFar}|Headers], Rest2}; 238 << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> 239 parse_hd_value(Rest2, Headers, Name, SoFar); 240 << $\n, Rest2/bits >> -> 241 parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) 242 end; 243 parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> 244 parse_hd_value(Rest, H, N, << SoFar/binary, C >>). 245 246 %% @doc Parse the body of the current multipart part. 247 %% 248 %% The body is everything until the next boundary. 249 250 -spec parse_body(binary(), binary()) 251 -> {ok, binary()} | {ok, binary(), binary()} 252 | done | {done, binary()} | {done, binary(), binary()}. 253 parse_body(Stream, Boundary) -> 254 BoundarySize = byte_size(Boundary), 255 case Stream of 256 << "--", Boundary:BoundarySize/binary, _/bits >> -> 257 done; 258 _ -> 259 case binary:match(Stream, << "\r\n--", Boundary/bits >>) of 260 %% No boundary, check for a possible partial at the end. 261 %% Return more or less of the body depending on the result. 262 nomatch -> 263 StreamSize = byte_size(Stream), 264 From = StreamSize - BoundarySize - 3, 265 MatchOpts = if 266 %% Binary too small to contain boundary, check it fully. 267 From < 0 -> []; 268 %% Optimize, only check the end of the binary. 269 true -> [{scope, {From, StreamSize - From}}] 270 end, 271 case binary:match(Stream, <<"\r">>, MatchOpts) of 272 nomatch -> 273 {ok, Stream}; 274 {Pos, _} -> 275 case Stream of 276 << Body:Pos/binary >> -> 277 {ok, Body}; 278 << Body:Pos/binary, Rest/bits >> -> 279 {ok, Body, Rest} 280 end 281 end; 282 %% Boundary found, this is the last chunk of the body. 283 {Pos, _} -> 284 case Stream of 285 << Body:Pos/binary, "\r\n" >> -> 286 {done, Body}; 287 << Body:Pos/binary, "\r\n", Rest/bits >> -> 288 {done, Body, Rest}; 289 << Body:Pos/binary, Rest/bits >> -> 290 {done, Body, Rest} 291 end 292 end 293 end. 294 295 -ifdef(TEST). 296 parse_test() -> 297 H1 = [{<<"content-type">>, <<"text/plain">>}], 298 Body1 = <<"This is the body of the message.">>, 299 H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, 300 {<<"content-transfer-encoding">>, <<"base64">>}]), 301 Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" 302 "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, 303 {ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY), 304 {done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY), 305 done = parse_body(Rest2, ?TEST1_BOUNDARY), 306 {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY), 307 H2 = lists:sort(H2Unsorted), 308 {done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY), 309 done = parse_body(Rest4, ?TEST1_BOUNDARY), 310 {done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY), 311 ok. 312 313 parse_interleaved_test() -> 314 H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], 315 Body1 = <<"Larry">>, 316 H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>}, 317 {<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]), 318 InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>}, 319 {<<"content-type">>, <<"text/plain">>}]), 320 InBody1 = <<"... contents of file1.txt ...">>, 321 InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>}, 322 {<<"content-type">>, <<"image/gif">>}, 323 {<<"content-transfer-encoding">>, <<"binary">>}]), 324 InBody2 = <<"...contents of file2.gif...">>, 325 {ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY), 326 {done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY), 327 done = parse_body(Rest2, ?TEST2_BOUNDARY), 328 {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY), 329 H2 = lists:sort(H2Unsorted), 330 {_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2), 331 {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]} 332 = parse_content_type(ContentType), 333 {ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary), 334 InH1 = lists:sort(InH1Unsorted), 335 {done, InBody1, InRest2} = parse_body(InRest, InBoundary), 336 done = parse_body(InRest2, InBoundary), 337 {ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary), 338 InH2 = lists:sort(InH2Unsorted), 339 {done, InBody2, InRest4} = parse_body(InRest3, InBoundary), 340 done = parse_body(InRest4, InBoundary), 341 {done, Rest4} = parse_headers(InRest4, InBoundary), 342 {done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY), 343 ok. 344 345 parse_epilogue_test() -> 346 H1 = [{<<"content-type">>, <<"text/plain">>}], 347 Body1 = <<"This is the body of the message.">>, 348 Epilogue = <<"\r\nThis is the epilogue. Here it includes leading CRLF">>, 349 {ok, H1, Rest} = parse_headers(?TEST3_MIME, ?TEST3_BOUNDARY), 350 {done, Body1, Rest2} = parse_body(Rest, ?TEST3_BOUNDARY), 351 done = parse_body(Rest2, ?TEST3_BOUNDARY), 352 {done, Epilogue} = parse_headers(Rest2, ?TEST3_BOUNDARY), 353 ok. 354 355 parse_epilogue_crlf_test() -> 356 H1 = [{<<"content-type">>, <<"text/plain">>}], 357 Body1 = <<"This is the body of the message.">>, 358 Epilogue = <<"\r\n">>, 359 {ok, H1, Rest} = parse_headers(?TEST4_MIME, ?TEST4_BOUNDARY), 360 {done, Body1, Rest2} = parse_body(Rest, ?TEST4_BOUNDARY), 361 done = parse_body(Rest2, ?TEST4_BOUNDARY), 362 {done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY), 363 ok. 364 365 parse_rfc2046_test() -> 366 %% The following is an example included in RFC 2046, Section 5.1.1. 367 Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n" 368 "It does NOT end with a linebreak.">>, 369 Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n" 370 "It DOES end with a linebreak.\r\n">>, 371 H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}], 372 Epilogue = <<"\r\n\r\nThis is the epilogue. It is also to be ignored.">>, 373 {ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY), 374 {done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY), 375 {ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY), 376 {done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY), 377 {done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY), 378 ok. 379 380 parse_partial_test() -> 381 {ok, <<0:8000, "abcdef">>, <<"\rghij">>} 382 = parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>), 383 {ok, <<"abcdef">>, <<"\rghij">>} 384 = parse_body(<<"abcdef\rghij">>, <<"boundary">>), 385 {ok, <<"abc">>, <<"\rdef">>} 386 = parse_body(<<"abc\rdef">>, <<"boundaryboundary">>), 387 {ok, <<0:8000, "abcdef">>, <<"\r\nghij">>} 388 = parse_body(<<0:8000, "abcdef\r\nghij">>, <<"boundary">>), 389 {ok, <<"abcdef">>, <<"\r\nghij">>} 390 = parse_body(<<"abcdef\r\nghij">>, <<"boundary">>), 391 {ok, <<"abc">>, <<"\r\ndef">>} 392 = parse_body(<<"abc\r\ndef">>, <<"boundaryboundary">>), 393 {ok, <<"boundary">>, <<"\r">>} 394 = parse_body(<<"boundary\r">>, <<"boundary">>), 395 {ok, <<"boundary">>, <<"\r\n">>} 396 = parse_body(<<"boundary\r\n">>, <<"boundary">>), 397 {ok, <<"boundary">>, <<"\r\n-">>} 398 = parse_body(<<"boundary\r\n-">>, <<"boundary">>), 399 {ok, <<"boundary">>, <<"\r\n--">>} 400 = parse_body(<<"boundary\r\n--">>, <<"boundary">>), 401 ok. 402 403 perf_parse_multipart(Stream, Boundary) -> 404 case parse_headers(Stream, Boundary) of 405 {ok, _, Rest} -> 406 {_, _, Rest2} = parse_body(Rest, Boundary), 407 perf_parse_multipart(Rest2, Boundary); 408 {done, _} -> 409 ok 410 end. 411 412 horse_parse() -> 413 horse:repeat(50000, 414 perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY) 415 ). 416 -endif. 417 418 %% Building. 419 420 %% @doc Generate a new random boundary. 421 %% 422 %% The boundary generated has a low probability of ever appearing 423 %% in the data. 424 425 -spec boundary() -> binary(). 426 boundary() -> 427 cow_base64url:encode(crypto:strong_rand_bytes(48), #{padding => false}). 428 429 %% @doc Return the first part's head. 430 %% 431 %% This works exactly like the part/2 function except there is 432 %% no leading \r\n. It's not required to use this function, 433 %% just makes the output a little smaller and prettier. 434 435 -spec first_part(binary(), headers()) -> iodata(). 436 first_part(Boundary, Headers) -> 437 [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. 438 439 %% @doc Return a part's head. 440 441 -spec part(binary(), headers()) -> iodata(). 442 part(Boundary, Headers) -> 443 [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. 444 445 headers_to_iolist([], Acc) -> 446 lists:reverse([<<"\r\n">>|Acc]); 447 headers_to_iolist([{N, V}|Tail], Acc) -> 448 %% We don't want to create a sublist so we list the 449 %% values in reverse order so that it gets reversed properly. 450 headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). 451 452 %% @doc Return the closing delimiter of the multipart message. 453 454 -spec close(binary()) -> iodata(). 455 close(Boundary) -> 456 [<<"\r\n--">>, Boundary, <<"--">>]. 457 458 -ifdef(TEST). 459 build_test() -> 460 Result = string:to_lower(binary_to_list(?TEST1_MIME)), 461 Result = string:to_lower(binary_to_list(iolist_to_binary([ 462 <<"This is a message with multiple parts in MIME format.\r\n">>, 463 first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]), 464 <<"This is the body of the message.">>, 465 part(?TEST1_BOUNDARY, [ 466 {<<"content-type">>, <<"application/octet-stream">>}, 467 {<<"content-transfer-encoding">>, <<"base64">>}]), 468 <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" 469 "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, 470 close(?TEST1_BOUNDARY) 471 ]))), 472 ok. 473 474 identity_test() -> 475 B = boundary(), 476 Preamble = <<"This is a message with multiple parts in MIME format.">>, 477 H1 = [{<<"content-type">>, <<"text/plain">>}], 478 Body1 = <<"This is the body of the message.">>, 479 H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, 480 {<<"content-transfer-encoding">>, <<"base64">>}]), 481 Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" 482 "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, 483 Epilogue = <<"Gotta go fast!">>, 484 M = iolist_to_binary([ 485 Preamble, 486 part(B, H1), Body1, 487 part(B, H2), Body2, 488 close(B), 489 Epilogue 490 ]), 491 {done, Preamble, M2} = parse_body(M, B), 492 {ok, H1, M3} = parse_headers(M2, B), 493 {done, Body1, M4} = parse_body(M3, B), 494 {ok, H2Unsorted, M5} = parse_headers(M4, B), 495 H2 = lists:sort(H2Unsorted), 496 {done, Body2, M6} = parse_body(M5, B), 497 {done, Epilogue} = parse_headers(M6, B), 498 ok. 499 500 perf_build_multipart() -> 501 B = boundary(), 502 [ 503 <<"preamble\r\n">>, 504 first_part(B, [{<<"content-type">>, <<"text/plain">>}]), 505 <<"This is the body of the message.">>, 506 part(B, [ 507 {<<"content-type">>, <<"application/octet-stream">>}, 508 {<<"content-transfer-encoding">>, <<"base64">>}]), 509 <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" 510 "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, 511 close(B), 512 <<"epilogue">> 513 ]. 514 515 horse_build() -> 516 horse:repeat(50000, 517 perf_build_multipart() 518 ). 519 -endif. 520 521 %% Headers. 522 523 %% @doc Convenience function for extracting information from headers 524 %% when parsing a multipart/form-data stream. 525 526 -spec form_data(headers() | #{binary() => binary()}) 527 -> {data, binary()} 528 | {file, binary(), binary(), binary()}. 529 form_data(Headers) when is_map(Headers) -> 530 form_data(maps:to_list(Headers)); 531 form_data(Headers) -> 532 {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), 533 {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), 534 {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), 535 case lists:keyfind(<<"filename">>, 1, Params) of 536 false -> 537 {data, FieldName}; 538 {_, Filename} -> 539 Type = case lists:keyfind(<<"content-type">>, 1, Headers) of 540 false -> <<"text/plain">>; 541 {_, T} -> T 542 end, 543 {file, FieldName, Filename, Type} 544 end. 545 546 -ifdef(TEST). 547 form_data_test_() -> 548 Tests = [ 549 {[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], 550 {data, <<"submit-name">>}}, 551 {[{<<"content-disposition">>, 552 <<"form-data; name=\"files\"; filename=\"file1.txt\"">>}, 553 {<<"content-type">>, <<"text/x-plain">>}], 554 {file, <<"files">>, <<"file1.txt">>, <<"text/x-plain">>}} 555 ], 556 [{lists:flatten(io_lib:format("~p", [V])), 557 fun() -> R = form_data(V) end} || {V, R} <- Tests]. 558 -endif. 559 560 %% @todo parse_content_description 561 %% @todo parse_content_id 562 563 %% @doc Parse an RFC 2183 content-disposition value. 564 %% @todo Support RFC 2231. 565 566 -spec parse_content_disposition(binary()) 567 -> {binary(), [{binary(), binary()}]}. 568 parse_content_disposition(Bin) -> 569 parse_cd_type(Bin, <<>>). 570 571 parse_cd_type(<<>>, Acc) -> 572 {Acc, []}; 573 parse_cd_type(<< C, Rest/bits >>, Acc) -> 574 case C of 575 $; -> {Acc, parse_before_param(Rest, [])}; 576 $\s -> {Acc, parse_before_param(Rest, [])}; 577 $\t -> {Acc, parse_before_param(Rest, [])}; 578 _ -> ?LOWER(parse_cd_type, Rest, Acc) 579 end. 580 581 -ifdef(TEST). 582 parse_content_disposition_test_() -> 583 Tests = [ 584 {<<"inline">>, {<<"inline">>, []}}, 585 {<<"attachment">>, {<<"attachment">>, []}}, 586 {<<"attachment; filename=genome.jpeg;" 587 " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>, 588 {<<"attachment">>, [ 589 {<<"filename">>, <<"genome.jpeg">>}, 590 {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>} 591 ]}}, 592 {<<"form-data; name=\"user\"">>, 593 {<<"form-data">>, [{<<"name">>, <<"user">>}]}}, 594 {<<"form-data; NAME=\"submit-name\"">>, 595 {<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}}, 596 {<<"form-data; name=\"files\"; filename=\"file1.txt\"">>, 597 {<<"form-data">>, [ 598 {<<"name">>, <<"files">>}, 599 {<<"filename">>, <<"file1.txt">>} 600 ]}}, 601 {<<"file; filename=\"file1.txt\"">>, 602 {<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}}, 603 {<<"file; filename=\"file2.gif\"">>, 604 {<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}} 605 ], 606 [{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests]. 607 608 horse_parse_content_disposition_attachment() -> 609 horse:repeat(100000, 610 parse_content_disposition(<<"attachment; filename=genome.jpeg;" 611 " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>) 612 ). 613 614 horse_parse_content_disposition_form_data() -> 615 horse:repeat(100000, 616 parse_content_disposition( 617 <<"form-data; name=\"files\"; filename=\"file1.txt\"">>) 618 ). 619 620 horse_parse_content_disposition_inline() -> 621 horse:repeat(100000, 622 parse_content_disposition(<<"inline">>) 623 ). 624 -endif. 625 626 %% @doc Parse an RFC 2045 content-transfer-encoding header. 627 628 -spec parse_content_transfer_encoding(binary()) -> binary(). 629 parse_content_transfer_encoding(Bin) -> 630 ?LOWER(Bin). 631 632 -ifdef(TEST). 633 parse_content_transfer_encoding_test_() -> 634 Tests = [ 635 {<<"7bit">>, <<"7bit">>}, 636 {<<"7BIT">>, <<"7bit">>}, 637 {<<"8bit">>, <<"8bit">>}, 638 {<<"binary">>, <<"binary">>}, 639 {<<"quoted-printable">>, <<"quoted-printable">>}, 640 {<<"base64">>, <<"base64">>}, 641 {<<"Base64">>, <<"base64">>}, 642 {<<"BASE64">>, <<"base64">>}, 643 {<<"bAsE64">>, <<"base64">>} 644 ], 645 [{V, fun() -> R = parse_content_transfer_encoding(V) end} 646 || {V, R} <- Tests]. 647 648 horse_parse_content_transfer_encoding() -> 649 horse:repeat(100000, 650 parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>) 651 ). 652 -endif. 653 654 %% @doc Parse an RFC 2045 content-type header. 655 656 -spec parse_content_type(binary()) 657 -> {binary(), binary(), [{binary(), binary()}]}. 658 parse_content_type(Bin) -> 659 parse_ct_type(Bin, <<>>). 660 661 parse_ct_type(<< C, Rest/bits >>, Acc) -> 662 case C of 663 $/ -> parse_ct_subtype(Rest, Acc, <<>>); 664 _ -> ?LOWER(parse_ct_type, Rest, Acc) 665 end. 666 667 parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> 668 {Type, Subtype, []}; 669 parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> 670 case C of 671 $; -> {Type, Acc, parse_before_param(Rest, [])}; 672 $\s -> {Type, Acc, parse_before_param(Rest, [])}; 673 $\t -> {Type, Acc, parse_before_param(Rest, [])}; 674 _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc) 675 end. 676 677 -ifdef(TEST). 678 parse_content_type_test_() -> 679 Tests = [ 680 {<<"image/gif">>, 681 {<<"image">>, <<"gif">>, []}}, 682 {<<"text/plain">>, 683 {<<"text">>, <<"plain">>, []}}, 684 {<<"text/plain; charset=us-ascii">>, 685 {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, 686 {<<"text/plain; charset=\"us-ascii\"">>, 687 {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, 688 {<<"multipart/form-data; boundary=AaB03x">>, 689 {<<"multipart">>, <<"form-data">>, 690 [{<<"boundary">>, <<"AaB03x">>}]}}, 691 {<<"multipart/mixed; boundary=BbC04y">>, 692 {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}}, 693 {<<"multipart/mixed; boundary=--------">>, 694 {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}}, 695 {<<"application/x-horse; filename=genome.jpeg;" 696 " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" 697 " charset=us-ascii; empty=; number=12345">>, 698 {<<"application">>, <<"x-horse">>, [ 699 {<<"filename">>, <<"genome.jpeg">>}, 700 {<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}, 701 {<<"charset">>, <<"us-ascii">>}, 702 {<<"empty">>, <<>>}, 703 {<<"number">>, <<"12345">>} 704 ]}} 705 ], 706 [{V, fun() -> R = parse_content_type(V) end} 707 || {V, R} <- Tests]. 708 709 horse_parse_content_type_zero() -> 710 horse:repeat(100000, 711 parse_content_type(<<"text/plain">>) 712 ). 713 714 horse_parse_content_type_one() -> 715 horse:repeat(100000, 716 parse_content_type(<<"text/plain; charset=\"us-ascii\"">>) 717 ). 718 719 horse_parse_content_type_five() -> 720 horse:repeat(100000, 721 parse_content_type(<<"application/x-horse; filename=genome.jpeg;" 722 " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" 723 " charset=us-ascii; empty=; number=12345">>) 724 ). 725 -endif. 726 727 %% @doc Parse RFC 2045 parameters. 728 729 parse_before_param(<<>>, Params) -> 730 lists:reverse(Params); 731 parse_before_param(<< C, Rest/bits >>, Params) -> 732 case C of 733 $; -> parse_before_param(Rest, Params); 734 $\s -> parse_before_param(Rest, Params); 735 $\t -> parse_before_param(Rest, Params); 736 _ -> ?LOWER(parse_param_name, Rest, Params, <<>>) 737 end. 738 739 parse_param_name(<<>>, Params, Acc) -> 740 lists:reverse([{Acc, <<>>}|Params]); 741 parse_param_name(<< C, Rest/bits >>, Params, Acc) -> 742 case C of 743 $= -> parse_param_value(Rest, Params, Acc); 744 _ -> ?LOWER(parse_param_name, Rest, Params, Acc) 745 end. 746 747 parse_param_value(<<>>, Params, Name) -> 748 lists:reverse([{Name, <<>>}|Params]); 749 parse_param_value(<< C, Rest/bits >>, Params, Name) -> 750 case C of 751 $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); 752 $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); 753 $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); 754 $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); 755 C -> parse_param_value(Rest, Params, Name, << C >>) 756 end. 757 758 parse_param_value(<<>>, Params, Name, Acc) -> 759 lists:reverse([{Name, Acc}|Params]); 760 parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> 761 case C of 762 $; -> parse_before_param(Rest, [{Name, Acc}|Params]); 763 $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); 764 $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); 765 C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) 766 end. 767 768 %% We expect a final $" so no need to test for <<>>. 769 parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> 770 parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); 771 parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> 772 parse_before_param(Rest, [{Name, Acc}|Params]); 773 parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) 774 when C =/= $\r -> 775 parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).