plug_multipart.erl (15979B)
1 %% Copyright (c) 2014-2015, Loïc Hoguin <essen@ninenines.eu> 2 %% 3 %% Permission to use, copy, modify, and/or distribute this software for any 4 %% purpose with or without fee is hereby granted, provided that the above 5 %% copyright notice and this permission notice appear in all copies. 6 %% 7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 -module(plug_multipart). 16 17 %% Parsing. 18 -export([parse_headers/2]). 19 -export([parse_body/2]). 20 21 %% Building. 22 -export([boundary/0]). 23 -export([first_part/2]). 24 -export([part/2]). 25 -export([close/1]). 26 27 %% Headers. 28 -export([form_data/1]). 29 -export([parse_content_disposition/1]). 30 -export([parse_content_transfer_encoding/1]). 31 -export([parse_content_type/1]). 32 33 -type headers() :: [{iodata(), iodata()}]. 34 -export_type([headers/0]). 35 36 -define(LC(C), case C of 37 $A -> $a; 38 $B -> $b; 39 $C -> $c; 40 $D -> $d; 41 $E -> $e; 42 $F -> $f; 43 $G -> $g; 44 $H -> $h; 45 $I -> $i; 46 $J -> $j; 47 $K -> $k; 48 $L -> $l; 49 $M -> $m; 50 $N -> $n; 51 $O -> $o; 52 $P -> $p; 53 $Q -> $q; 54 $R -> $r; 55 $S -> $s; 56 $T -> $t; 57 $U -> $u; 58 $V -> $v; 59 $W -> $w; 60 $X -> $x; 61 $Y -> $y; 62 $Z -> $z; 63 _ -> C 64 end). 65 66 %% LOWER(Bin) 67 %% 68 %% Lowercase the entire binary string in a binary comprehension. 69 70 -define(LOWER(Bin), << << ?LC(C) >> || << C >> <= Bin >>). 71 72 %% LOWERCASE(Function, Rest, Acc, ...) 73 %% 74 %% To be included at the end of a case block. 75 %% Defined for up to 10 extra arguments. 76 77 -define(LOWER(Function, Rest, Acc), case C of 78 $A -> Function(Rest, << Acc/binary, $a >>); 79 $B -> Function(Rest, << Acc/binary, $b >>); 80 $C -> Function(Rest, << Acc/binary, $c >>); 81 $D -> Function(Rest, << Acc/binary, $d >>); 82 $E -> Function(Rest, << Acc/binary, $e >>); 83 $F -> Function(Rest, << Acc/binary, $f >>); 84 $G -> Function(Rest, << Acc/binary, $g >>); 85 $H -> Function(Rest, << Acc/binary, $h >>); 86 $I -> Function(Rest, << Acc/binary, $i >>); 87 $J -> Function(Rest, << Acc/binary, $j >>); 88 $K -> Function(Rest, << Acc/binary, $k >>); 89 $L -> Function(Rest, << Acc/binary, $l >>); 90 $M -> Function(Rest, << Acc/binary, $m >>); 91 $N -> Function(Rest, << Acc/binary, $n >>); 92 $O -> Function(Rest, << Acc/binary, $o >>); 93 $P -> Function(Rest, << Acc/binary, $p >>); 94 $Q -> Function(Rest, << Acc/binary, $q >>); 95 $R -> Function(Rest, << Acc/binary, $r >>); 96 $S -> Function(Rest, << Acc/binary, $s >>); 97 $T -> Function(Rest, << Acc/binary, $t >>); 98 $U -> Function(Rest, << Acc/binary, $u >>); 99 $V -> Function(Rest, << Acc/binary, $v >>); 100 $W -> Function(Rest, << Acc/binary, $w >>); 101 $X -> Function(Rest, << Acc/binary, $x >>); 102 $Y -> Function(Rest, << Acc/binary, $y >>); 103 $Z -> Function(Rest, << Acc/binary, $z >>); 104 C -> Function(Rest, << Acc/binary, C >>) 105 end). 106 107 -define(LOWER(Function, Rest, A0, Acc), case C of 108 $A -> Function(Rest, A0, << Acc/binary, $a >>); 109 $B -> Function(Rest, A0, << Acc/binary, $b >>); 110 $C -> Function(Rest, A0, << Acc/binary, $c >>); 111 $D -> Function(Rest, A0, << Acc/binary, $d >>); 112 $E -> Function(Rest, A0, << Acc/binary, $e >>); 113 $F -> Function(Rest, A0, << Acc/binary, $f >>); 114 $G -> Function(Rest, A0, << Acc/binary, $g >>); 115 $H -> Function(Rest, A0, << Acc/binary, $h >>); 116 $I -> Function(Rest, A0, << Acc/binary, $i >>); 117 $J -> Function(Rest, A0, << Acc/binary, $j >>); 118 $K -> Function(Rest, A0, << Acc/binary, $k >>); 119 $L -> Function(Rest, A0, << Acc/binary, $l >>); 120 $M -> Function(Rest, A0, << Acc/binary, $m >>); 121 $N -> Function(Rest, A0, << Acc/binary, $n >>); 122 $O -> Function(Rest, A0, << Acc/binary, $o >>); 123 $P -> Function(Rest, A0, << Acc/binary, $p >>); 124 $Q -> Function(Rest, A0, << Acc/binary, $q >>); 125 $R -> Function(Rest, A0, << Acc/binary, $r >>); 126 $S -> Function(Rest, A0, << Acc/binary, $s >>); 127 $T -> Function(Rest, A0, << Acc/binary, $t >>); 128 $U -> Function(Rest, A0, << Acc/binary, $u >>); 129 $V -> Function(Rest, A0, << Acc/binary, $v >>); 130 $W -> Function(Rest, A0, << Acc/binary, $w >>); 131 $X -> Function(Rest, A0, << Acc/binary, $x >>); 132 $Y -> Function(Rest, A0, << Acc/binary, $y >>); 133 $Z -> Function(Rest, A0, << Acc/binary, $z >>); 134 C -> Function(Rest, A0, << Acc/binary, C >>) 135 end). 136 137 %% Parsing. 138 %% 139 %% The multipart format is defined in RFC 2045. 140 141 %% @doc Parse the headers for the next multipart part. 142 %% 143 %% This function skips any preamble before the boundary. 144 %% The preamble may be retrieved using parse_body/2. 145 %% 146 %% This function will accept input of any size, it is 147 %% up to the caller to limit it if needed. 148 149 -spec parse_headers(binary(), binary()) 150 -> more | {more, binary()} 151 | {ok, headers(), binary()} 152 | {done, binary()}. 153 %% If the stream starts with the boundary we can make a few assumptions 154 %% and quickly figure out if we got the complete list of headers. 155 parse_headers(<< "--", Stream/bits >>, Boundary) -> 156 BoundarySize = byte_size(Boundary), 157 case Stream of 158 %% Last boundary. Return the epilogue. 159 << Boundary:BoundarySize/binary, "--", Stream2/bits >> -> 160 {done, Stream2}; 161 << Boundary:BoundarySize/binary, Stream2/bits >> -> 162 %% We have all the headers only if there is a \r\n\r\n 163 %% somewhere in the data after the boundary. 164 case binary:match(Stream2, <<"\r\n\r\n">>) of 165 nomatch -> 166 more; 167 _ -> 168 before_parse_headers(Stream2) 169 end; 170 %% If there isn't enough to represent Boundary \r\n\r\n 171 %% then we definitely don't have all the headers. 172 _ when byte_size(Stream) < byte_size(Boundary) + 4 -> 173 more; 174 %% Otherwise we have preamble data to skip. 175 %% We still got rid of the first two misleading bytes. 176 _ -> 177 skip_preamble(Stream, Boundary) 178 end; 179 %% Otherwise we have preamble data to skip. 180 parse_headers(Stream, Boundary) -> 181 skip_preamble(Stream, Boundary). 182 183 %% We need to find the boundary and a \r\n\r\n after that. 184 %% Since the boundary isn't at the start, it must be right 185 %% after a \r\n too. 186 skip_preamble(Stream, Boundary) -> 187 case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of 188 %% No boundary, need more data. 189 nomatch -> 190 %% We can safely skip the size of the stream 191 %% minus the last 3 bytes which may be a partial boundary. 192 SkipSize = byte_size(Stream) - 3, 193 case SkipSize > 0 of 194 false -> 195 more; 196 true -> 197 << _:SkipSize/binary, Stream2/bits >> = Stream, 198 {more, Stream2} 199 end; 200 {Start, Length} -> 201 Start2 = Start + Length, 202 << _:Start2/binary, Stream2/bits >> = Stream, 203 case Stream2 of 204 %% Last boundary. Return the epilogue. 205 << "--", Stream3/bits >> -> 206 {done, Stream3}; 207 _ -> 208 case binary:match(Stream, <<"\r\n\r\n">>) of 209 %% We don't have the full headers. 210 nomatch -> 211 {more, Stream2}; 212 _ -> 213 before_parse_headers(Stream2) 214 end 215 end 216 end. 217 218 before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> 219 %% This indicates that there are no headers, so we can abort immediately. 220 {ok, [], Stream}; 221 before_parse_headers(<< "\r\n", Stream/bits >>) -> 222 %% There is a line break right after the boundary, skip it. 223 parse_hd_name(Stream, [], <<>>). 224 225 parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> 226 case C of 227 $: -> parse_hd_before_value(Rest, H, SoFar); 228 $\s -> parse_hd_name_ws(Rest, H, SoFar); 229 $\t -> parse_hd_name_ws(Rest, H, SoFar); 230 _ -> ?LOWER(parse_hd_name, Rest, H, SoFar) 231 end. 232 233 parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> 234 case C of 235 $\s -> parse_hd_name_ws(Rest, H, Name); 236 $\t -> parse_hd_name_ws(Rest, H, Name); 237 $: -> parse_hd_before_value(Rest, H, Name) 238 end. 239 240 parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> 241 parse_hd_before_value(Rest, H, N); 242 parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> 243 parse_hd_before_value(Rest, H, N); 244 parse_hd_before_value(Buffer, H, N) -> 245 parse_hd_value(Buffer, H, N, <<>>). 246 247 parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> 248 case Rest of 249 << "\n\r\n", Rest2/bits >> -> 250 {ok, [{Name, SoFar}|Headers], Rest2}; 251 << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> 252 parse_hd_value(Rest2, Headers, Name, SoFar); 253 << $\n, Rest2/bits >> -> 254 parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) 255 end; 256 parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> 257 parse_hd_value(Rest, H, N, << SoFar/binary, C >>). 258 259 %% @doc Parse the body of the current multipart part. 260 %% 261 %% The body is everything until the next boundary. 262 263 -spec parse_body(binary(), binary()) 264 -> {ok, binary()} | {ok, binary(), binary()} 265 | done | {done, binary()} | {done, binary(), binary()}. 266 parse_body(Stream, Boundary) -> 267 BoundarySize = byte_size(Boundary), 268 case Stream of 269 << "--", Boundary:BoundarySize/binary, _/bits >> -> 270 done; 271 _ -> 272 case binary:match(Stream, << "\r\n--", Boundary/bits >>) of 273 %% No boundary, check for a possible partial at the end. 274 %% Return more or less of the body depending on the result. 275 nomatch -> 276 StreamSize = byte_size(Stream), 277 From = StreamSize - BoundarySize - 3, 278 MatchOpts = if 279 %% Binary too small to contain boundary, check it fully. 280 From < 0 -> []; 281 %% Optimize, only check the end of the binary. 282 true -> [{scope, {From, StreamSize - From}}] 283 end, 284 case binary:match(Stream, <<"\r">>, MatchOpts) of 285 nomatch -> 286 {ok, Stream}; 287 {Pos, _} -> 288 case Stream of 289 << Body:Pos/binary >> -> 290 {ok, Body}; 291 << Body:Pos/binary, Rest/bits >> -> 292 {ok, Body, Rest} 293 end 294 end; 295 %% Boundary found, this is the last chunk of the body. 296 {Pos, _} -> 297 case Stream of 298 << Body:Pos/binary, "\r\n" >> -> 299 {done, Body}; 300 << Body:Pos/binary, "\r\n", Rest/bits >> -> 301 {done, Body, Rest}; 302 << Body:Pos/binary, Rest/bits >> -> 303 {done, Body, Rest} 304 end 305 end 306 end. 307 308 %% Building. 309 310 %% @doc Generate a new random boundary. 311 %% 312 %% The boundary generated has a low probability of ever appearing 313 %% in the data. 314 315 -spec boundary() -> binary(). 316 boundary() -> 317 base64:encode(crypto:strong_rand_bytes(48)). 318 319 %% @doc Return the first part's head. 320 %% 321 %% This works exactly like the part/2 function except there is 322 %% no leading \r\n. It's not required to use this function, 323 %% just makes the output a little smaller and prettier. 324 325 -spec first_part(binary(), headers()) -> iodata(). 326 first_part(Boundary, Headers) -> 327 [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. 328 329 %% @doc Return a part's head. 330 331 -spec part(binary(), headers()) -> iodata(). 332 part(Boundary, Headers) -> 333 [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. 334 335 headers_to_iolist([], Acc) -> 336 lists:reverse([<<"\r\n">>|Acc]); 337 headers_to_iolist([{N, V}|Tail], Acc) -> 338 %% We don't want to create a sublist so we list the 339 %% values in reverse order so that it gets reversed properly. 340 headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). 341 342 %% @doc Return the closing delimiter of the multipart message. 343 344 -spec close(binary()) -> iodata(). 345 close(Boundary) -> 346 [<<"\r\n--">>, Boundary, <<"--">>]. 347 348 %% Headers. 349 350 %% @doc Convenience function for extracting information from headers 351 %% when parsing a multipart/form-data stream. 352 353 -spec form_data(headers()) 354 -> {data, binary()} 355 | {file, binary(), binary(), binary(), binary()}. 356 form_data(Headers) -> 357 {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), 358 {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), 359 {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), 360 case lists:keyfind(<<"filename">>, 1, Params) of 361 false -> 362 {data, FieldName}; 363 {_, Filename} -> 364 Type = case lists:keyfind(<<"content-type">>, 1, Headers) of 365 false -> <<"text/plain">>; 366 {_, T} -> T 367 end, 368 %% @todo Turns out this is unnecessary per RFC7578 4.7. 369 TransferEncoding = case lists:keyfind( 370 <<"content-transfer-encoding">>, 1, Headers) of 371 false -> <<"7bit">>; 372 {_, TE} -> TE 373 end, 374 {file, FieldName, Filename, Type, TransferEncoding} 375 end. 376 377 %% @doc Parse an RFC 2183 content-disposition value. 378 379 -spec parse_content_disposition(binary()) 380 -> {binary(), [{binary(), binary()}]}. 381 parse_content_disposition(Bin) -> 382 parse_cd_type(Bin, <<>>). 383 384 parse_cd_type(<<>>, Acc) -> 385 {Acc, []}; 386 parse_cd_type(<< C, Rest/bits >>, Acc) -> 387 case C of 388 $; -> {Acc, parse_before_param(Rest, [])}; 389 $\s -> {Acc, parse_before_param(Rest, [])}; 390 $\t -> {Acc, parse_before_param(Rest, [])}; 391 _ -> ?LOWER(parse_cd_type, Rest, Acc) 392 end. 393 394 %% @doc Parse an RFC 2045 content-transfer-encoding header. 395 396 -spec parse_content_transfer_encoding(binary()) -> binary(). 397 parse_content_transfer_encoding(Bin) -> 398 ?LOWER(Bin). 399 400 %% @doc Parse an RFC 2045 content-type header. 401 402 -spec parse_content_type(binary()) 403 -> {binary(), binary(), [{binary(), binary()}]}. 404 parse_content_type(Bin) -> 405 parse_ct_type(Bin, <<>>). 406 407 parse_ct_type(<< C, Rest/bits >>, Acc) -> 408 case C of 409 $/ -> parse_ct_subtype(Rest, Acc, <<>>); 410 _ -> ?LOWER(parse_ct_type, Rest, Acc) 411 end. 412 413 parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> 414 {Type, Subtype, []}; 415 parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> 416 case C of 417 $; -> {Type, Acc, parse_before_param(Rest, [])}; 418 $\s -> {Type, Acc, parse_before_param(Rest, [])}; 419 $\t -> {Type, Acc, parse_before_param(Rest, [])}; 420 _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc) 421 end. 422 423 %% @doc Parse RFC 2045 parameters. 424 425 parse_before_param(<<>>, Params) -> 426 lists:reverse(Params); 427 parse_before_param(<< C, Rest/bits >>, Params) -> 428 case C of 429 $; -> parse_before_param(Rest, Params); 430 $\s -> parse_before_param(Rest, Params); 431 $\t -> parse_before_param(Rest, Params); 432 _ -> ?LOWER(parse_param_name, Rest, Params, <<>>) 433 end. 434 435 parse_param_name(<<>>, Params, Acc) -> 436 lists:reverse([{Acc, <<>>}|Params]); 437 parse_param_name(<< C, Rest/bits >>, Params, Acc) -> 438 case C of 439 $= -> parse_param_value(Rest, Params, Acc); 440 _ -> ?LOWER(parse_param_name, Rest, Params, Acc) 441 end. 442 443 parse_param_value(<<>>, Params, Name) -> 444 lists:reverse([{Name, <<>>}|Params]); 445 parse_param_value(<< C, Rest/bits >>, Params, Name) -> 446 case C of 447 $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); 448 $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); 449 $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); 450 $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); 451 C -> parse_param_value(Rest, Params, Name, << C >>) 452 end. 453 454 parse_param_value(<<>>, Params, Name, Acc) -> 455 lists:reverse([{Name, Acc}|Params]); 456 parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> 457 case C of 458 $; -> parse_before_param(Rest, [{Name, Acc}|Params]); 459 $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); 460 $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); 461 C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) 462 end. 463 464 %% We expect a final $" so no need to test for <<>>. 465 parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> 466 parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); 467 parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> 468 parse_before_param(Rest, [{Name, Acc}|Params]); 469 parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) 470 when C =/= $\r -> 471 parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).