zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

plug_multipart.erl (15979B)


      1 %% Copyright (c) 2014-2015, Loïc Hoguin <essen@ninenines.eu>
      2 %%
      3 %% Permission to use, copy, modify, and/or distribute this software for any
      4 %% purpose with or without fee is hereby granted, provided that the above
      5 %% copyright notice and this permission notice appear in all copies.
      6 %%
      7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     14 
     15 -module(plug_multipart).
     16 
     17 %% Parsing.
     18 -export([parse_headers/2]).
     19 -export([parse_body/2]).
     20 
     21 %% Building.
     22 -export([boundary/0]).
     23 -export([first_part/2]).
     24 -export([part/2]).
     25 -export([close/1]).
     26 
     27 %% Headers.
     28 -export([form_data/1]).
     29 -export([parse_content_disposition/1]).
     30 -export([parse_content_transfer_encoding/1]).
     31 -export([parse_content_type/1]).
     32 
     33 -type headers() :: [{iodata(), iodata()}].
     34 -export_type([headers/0]).
     35 
     36 -define(LC(C), case C of
     37   $A -> $a;
     38   $B -> $b;
     39   $C -> $c;
     40   $D -> $d;
     41   $E -> $e;
     42   $F -> $f;
     43   $G -> $g;
     44   $H -> $h;
     45   $I -> $i;
     46   $J -> $j;
     47   $K -> $k;
     48   $L -> $l;
     49   $M -> $m;
     50   $N -> $n;
     51   $O -> $o;
     52   $P -> $p;
     53   $Q -> $q;
     54   $R -> $r;
     55   $S -> $s;
     56   $T -> $t;
     57   $U -> $u;
     58   $V -> $v;
     59   $W -> $w;
     60   $X -> $x;
     61   $Y -> $y;
     62   $Z -> $z;
     63   _ -> C
     64 end).
     65 
     66 %% LOWER(Bin)
     67 %%
     68 %% Lowercase the entire binary string in a binary comprehension.
     69 
     70 -define(LOWER(Bin), << << ?LC(C) >> || << C >> <= Bin >>).
     71 
     72 %% LOWERCASE(Function, Rest, Acc, ...)
     73 %%
     74 %% To be included at the end of a case block.
     75 %% Defined for up to 10 extra arguments.
     76 
     77 -define(LOWER(Function, Rest, Acc), case C of
     78   $A -> Function(Rest, << Acc/binary, $a >>);
     79   $B -> Function(Rest, << Acc/binary, $b >>);
     80   $C -> Function(Rest, << Acc/binary, $c >>);
     81   $D -> Function(Rest, << Acc/binary, $d >>);
     82   $E -> Function(Rest, << Acc/binary, $e >>);
     83   $F -> Function(Rest, << Acc/binary, $f >>);
     84   $G -> Function(Rest, << Acc/binary, $g >>);
     85   $H -> Function(Rest, << Acc/binary, $h >>);
     86   $I -> Function(Rest, << Acc/binary, $i >>);
     87   $J -> Function(Rest, << Acc/binary, $j >>);
     88   $K -> Function(Rest, << Acc/binary, $k >>);
     89   $L -> Function(Rest, << Acc/binary, $l >>);
     90   $M -> Function(Rest, << Acc/binary, $m >>);
     91   $N -> Function(Rest, << Acc/binary, $n >>);
     92   $O -> Function(Rest, << Acc/binary, $o >>);
     93   $P -> Function(Rest, << Acc/binary, $p >>);
     94   $Q -> Function(Rest, << Acc/binary, $q >>);
     95   $R -> Function(Rest, << Acc/binary, $r >>);
     96   $S -> Function(Rest, << Acc/binary, $s >>);
     97   $T -> Function(Rest, << Acc/binary, $t >>);
     98   $U -> Function(Rest, << Acc/binary, $u >>);
     99   $V -> Function(Rest, << Acc/binary, $v >>);
    100   $W -> Function(Rest, << Acc/binary, $w >>);
    101   $X -> Function(Rest, << Acc/binary, $x >>);
    102   $Y -> Function(Rest, << Acc/binary, $y >>);
    103   $Z -> Function(Rest, << Acc/binary, $z >>);
    104   C -> Function(Rest, << Acc/binary, C >>)
    105 end).
    106 
    107 -define(LOWER(Function, Rest, A0, Acc), case C of
    108   $A -> Function(Rest, A0, << Acc/binary, $a >>);
    109   $B -> Function(Rest, A0, << Acc/binary, $b >>);
    110   $C -> Function(Rest, A0, << Acc/binary, $c >>);
    111   $D -> Function(Rest, A0, << Acc/binary, $d >>);
    112   $E -> Function(Rest, A0, << Acc/binary, $e >>);
    113   $F -> Function(Rest, A0, << Acc/binary, $f >>);
    114   $G -> Function(Rest, A0, << Acc/binary, $g >>);
    115   $H -> Function(Rest, A0, << Acc/binary, $h >>);
    116   $I -> Function(Rest, A0, << Acc/binary, $i >>);
    117   $J -> Function(Rest, A0, << Acc/binary, $j >>);
    118   $K -> Function(Rest, A0, << Acc/binary, $k >>);
    119   $L -> Function(Rest, A0, << Acc/binary, $l >>);
    120   $M -> Function(Rest, A0, << Acc/binary, $m >>);
    121   $N -> Function(Rest, A0, << Acc/binary, $n >>);
    122   $O -> Function(Rest, A0, << Acc/binary, $o >>);
    123   $P -> Function(Rest, A0, << Acc/binary, $p >>);
    124   $Q -> Function(Rest, A0, << Acc/binary, $q >>);
    125   $R -> Function(Rest, A0, << Acc/binary, $r >>);
    126   $S -> Function(Rest, A0, << Acc/binary, $s >>);
    127   $T -> Function(Rest, A0, << Acc/binary, $t >>);
    128   $U -> Function(Rest, A0, << Acc/binary, $u >>);
    129   $V -> Function(Rest, A0, << Acc/binary, $v >>);
    130   $W -> Function(Rest, A0, << Acc/binary, $w >>);
    131   $X -> Function(Rest, A0, << Acc/binary, $x >>);
    132   $Y -> Function(Rest, A0, << Acc/binary, $y >>);
    133   $Z -> Function(Rest, A0, << Acc/binary, $z >>);
    134   C -> Function(Rest, A0, << Acc/binary, C >>)
    135 end).
    136 
    137 %% Parsing.
    138 %%
    139 %% The multipart format is defined in RFC 2045.
    140 
    141 %% @doc Parse the headers for the next multipart part.
    142 %%
    143 %% This function skips any preamble before the boundary.
    144 %% The preamble may be retrieved using parse_body/2.
    145 %%
    146 %% This function will accept input of any size, it is
    147 %% up to the caller to limit it if needed.
    148 
    149 -spec parse_headers(binary(), binary())
    150   -> more | {more, binary()}
    151   | {ok, headers(), binary()}
    152   | {done, binary()}.
    153 %% If the stream starts with the boundary we can make a few assumptions
    154 %% and quickly figure out if we got the complete list of headers.
    155 parse_headers(<< "--", Stream/bits >>, Boundary) ->
    156   BoundarySize = byte_size(Boundary),
    157   case Stream of
    158     %% Last boundary. Return the epilogue.
    159     << Boundary:BoundarySize/binary, "--", Stream2/bits >> ->
    160       {done, Stream2};
    161     << Boundary:BoundarySize/binary, Stream2/bits >> ->
    162       %% We have all the headers only if there is a \r\n\r\n
    163       %% somewhere in the data after the boundary.
    164       case binary:match(Stream2, <<"\r\n\r\n">>) of
    165         nomatch ->
    166           more;
    167         _ ->
    168           before_parse_headers(Stream2)
    169       end;
    170     %% If there isn't enough to represent Boundary \r\n\r\n
    171     %% then we definitely don't have all the headers.
    172     _ when byte_size(Stream) < byte_size(Boundary) + 4 ->
    173       more;
    174     %% Otherwise we have preamble data to skip.
    175     %% We still got rid of the first two misleading bytes.
    176     _ ->
    177       skip_preamble(Stream, Boundary)
    178   end;
    179 %% Otherwise we have preamble data to skip.
    180 parse_headers(Stream, Boundary) ->
    181   skip_preamble(Stream, Boundary).
    182 
    183 %% We need to find the boundary and a \r\n\r\n after that.
    184 %% Since the boundary isn't at the start, it must be right
    185 %% after a \r\n too.
    186 skip_preamble(Stream, Boundary) ->
    187   case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of
    188     %% No boundary, need more data.
    189     nomatch ->
    190       %% We can safely skip the size of the stream
    191       %% minus the last 3 bytes which may be a partial boundary.
    192       SkipSize = byte_size(Stream) - 3,
    193       case SkipSize > 0 of
    194         false ->
    195           more;
    196         true ->
    197           << _:SkipSize/binary, Stream2/bits >> = Stream,
    198           {more, Stream2}
    199       end;
    200     {Start, Length} ->
    201       Start2 = Start + Length,
    202       << _:Start2/binary, Stream2/bits >> = Stream,
    203       case Stream2 of
    204         %% Last boundary. Return the epilogue.
    205         << "--", Stream3/bits >> ->
    206           {done, Stream3};
    207         _ ->
    208           case binary:match(Stream, <<"\r\n\r\n">>) of
    209             %% We don't have the full headers.
    210             nomatch ->
    211               {more, Stream2};
    212             _ ->
    213               before_parse_headers(Stream2)
    214           end
    215       end
    216   end.
    217 
    218 before_parse_headers(<< "\r\n\r\n", Stream/bits >>) ->
    219   %% This indicates that there are no headers, so we can abort immediately.
    220   {ok, [], Stream};
    221 before_parse_headers(<< "\r\n", Stream/bits >>) ->
    222   %% There is a line break right after the boundary, skip it.
    223   parse_hd_name(Stream, [], <<>>).
    224 
    225 parse_hd_name(<< C, Rest/bits >>, H, SoFar) ->
    226   case C of
    227     $: -> parse_hd_before_value(Rest, H, SoFar);
    228     $\s -> parse_hd_name_ws(Rest, H, SoFar);
    229     $\t -> parse_hd_name_ws(Rest, H, SoFar);
    230     _ -> ?LOWER(parse_hd_name, Rest, H, SoFar)
    231   end.
    232 
    233 parse_hd_name_ws(<< C, Rest/bits >>, H, Name) ->
    234   case C of
    235     $\s -> parse_hd_name_ws(Rest, H, Name);
    236     $\t -> parse_hd_name_ws(Rest, H, Name);
    237     $: -> parse_hd_before_value(Rest, H, Name)
    238   end.
    239 
    240 parse_hd_before_value(<< $\s, Rest/bits >>, H, N) ->
    241   parse_hd_before_value(Rest, H, N);
    242 parse_hd_before_value(<< $\t, Rest/bits >>, H, N) ->
    243   parse_hd_before_value(Rest, H, N);
    244 parse_hd_before_value(Buffer, H, N) ->
    245   parse_hd_value(Buffer, H, N, <<>>).
    246 
    247 parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) ->
    248   case Rest of
    249     << "\n\r\n", Rest2/bits >> ->
    250       {ok, [{Name, SoFar}|Headers], Rest2};
    251     << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t ->
    252       parse_hd_value(Rest2, Headers, Name, SoFar);
    253     << $\n, Rest2/bits >> ->
    254       parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>)
    255   end;
    256 parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) ->
    257   parse_hd_value(Rest, H, N, << SoFar/binary, C >>).
    258 
    259 %% @doc Parse the body of the current multipart part.
    260 %%
    261 %% The body is everything until the next boundary.
    262 
    263 -spec parse_body(binary(), binary())
    264   -> {ok, binary()} | {ok, binary(), binary()}
    265   | done | {done, binary()} | {done, binary(), binary()}.
    266 parse_body(Stream, Boundary) ->
    267   BoundarySize = byte_size(Boundary),
    268   case Stream of
    269     << "--", Boundary:BoundarySize/binary, _/bits >> ->
    270       done;
    271     _ ->
    272       case binary:match(Stream, << "\r\n--", Boundary/bits >>) of
    273         %% No boundary, check for a possible partial at the end.
    274         %% Return more or less of the body depending on the result.
    275         nomatch ->
    276           StreamSize = byte_size(Stream),
    277           From = StreamSize - BoundarySize - 3,
    278           MatchOpts = if
    279             %% Binary too small to contain boundary, check it fully.
    280             From < 0 -> [];
    281             %% Optimize, only check the end of the binary.
    282             true -> [{scope, {From, StreamSize - From}}]
    283           end,
    284           case binary:match(Stream, <<"\r">>, MatchOpts) of
    285             nomatch ->
    286               {ok, Stream};
    287             {Pos, _} ->
    288               case Stream of
    289                 << Body:Pos/binary >> ->
    290                   {ok, Body};
    291                 << Body:Pos/binary, Rest/bits >> ->
    292                   {ok, Body, Rest}
    293               end
    294           end;
    295         %% Boundary found, this is the last chunk of the body.
    296         {Pos, _} ->
    297           case Stream of
    298             << Body:Pos/binary, "\r\n" >> ->
    299               {done, Body};
    300             << Body:Pos/binary, "\r\n", Rest/bits >> ->
    301               {done, Body, Rest};
    302             << Body:Pos/binary, Rest/bits >> ->
    303               {done, Body, Rest}
    304           end
    305       end
    306   end.
    307 
    308 %% Building.
    309 
    310 %% @doc Generate a new random boundary.
    311 %%
    312 %% The boundary generated has a low probability of ever appearing
    313 %% in the data.
    314 
    315 -spec boundary() -> binary().
    316 boundary() ->
    317   base64:encode(crypto:strong_rand_bytes(48)).
    318 
    319 %% @doc Return the first part's head.
    320 %%
    321 %% This works exactly like the part/2 function except there is
    322 %% no leading \r\n. It's not required to use this function,
    323 %% just makes the output a little smaller and prettier.
    324 
    325 -spec first_part(binary(), headers()) -> iodata().
    326 first_part(Boundary, Headers) ->
    327   [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
    328 
    329 %% @doc Return a part's head.
    330 
    331 -spec part(binary(), headers()) -> iodata().
    332 part(Boundary, Headers) ->
    333   [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
    334 
    335 headers_to_iolist([], Acc) ->
    336   lists:reverse([<<"\r\n">>|Acc]);
    337 headers_to_iolist([{N, V}|Tail], Acc) ->
    338   %% We don't want to create a sublist so we list the
    339   %% values in reverse order so that it gets reversed properly.
    340   headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]).
    341 
    342 %% @doc Return the closing delimiter of the multipart message.
    343 
    344 -spec close(binary()) -> iodata().
    345 close(Boundary) ->
    346   [<<"\r\n--">>, Boundary, <<"--">>].
    347 
    348 %% Headers.
    349 
    350 %% @doc Convenience function for extracting information from headers
    351 %% when parsing a multipart/form-data stream.
    352 
    353 -spec form_data(headers())
    354   -> {data, binary()}
    355   | {file, binary(), binary(), binary(), binary()}.
    356 form_data(Headers) ->
    357   {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers),
    358   {<<"form-data">>, Params} = parse_content_disposition(DispositionBin),
    359   {_, FieldName} = lists:keyfind(<<"name">>, 1, Params),
    360   case lists:keyfind(<<"filename">>, 1, Params) of
    361     false ->
    362       {data, FieldName};
    363     {_, Filename} ->
    364       Type = case lists:keyfind(<<"content-type">>, 1, Headers) of
    365         false -> <<"text/plain">>;
    366         {_, T} -> T
    367       end,
    368       %% @todo Turns out this is unnecessary per RFC7578 4.7.
    369       TransferEncoding = case lists:keyfind(
    370           <<"content-transfer-encoding">>, 1, Headers) of
    371         false -> <<"7bit">>;
    372         {_, TE} -> TE
    373       end,
    374       {file, FieldName, Filename, Type, TransferEncoding}
    375   end.
    376 
    377 %% @doc Parse an RFC 2183 content-disposition value.
    378 
    379 -spec parse_content_disposition(binary())
    380   -> {binary(), [{binary(), binary()}]}.
    381 parse_content_disposition(Bin) ->
    382   parse_cd_type(Bin, <<>>).
    383 
    384 parse_cd_type(<<>>, Acc) ->
    385   {Acc, []};
    386 parse_cd_type(<< C, Rest/bits >>, Acc) ->
    387   case C of
    388     $; -> {Acc, parse_before_param(Rest, [])};
    389     $\s -> {Acc, parse_before_param(Rest, [])};
    390     $\t -> {Acc, parse_before_param(Rest, [])};
    391     _ -> ?LOWER(parse_cd_type, Rest, Acc)
    392   end.
    393 
    394 %% @doc Parse an RFC 2045 content-transfer-encoding header.
    395 
    396 -spec parse_content_transfer_encoding(binary()) -> binary().
    397 parse_content_transfer_encoding(Bin) ->
    398   ?LOWER(Bin).
    399 
    400 %% @doc Parse an RFC 2045 content-type header.
    401 
    402 -spec parse_content_type(binary())
    403   -> {binary(), binary(), [{binary(), binary()}]}.
    404 parse_content_type(Bin) ->
    405   parse_ct_type(Bin, <<>>).
    406 
    407 parse_ct_type(<< C, Rest/bits >>, Acc) ->
    408   case C of
    409     $/ -> parse_ct_subtype(Rest, Acc, <<>>);
    410     _ -> ?LOWER(parse_ct_type, Rest, Acc)
    411   end.
    412 
    413 parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> ->
    414   {Type, Subtype, []};
    415 parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) ->
    416   case C of
    417     $; -> {Type, Acc, parse_before_param(Rest, [])};
    418     $\s -> {Type, Acc, parse_before_param(Rest, [])};
    419     $\t -> {Type, Acc, parse_before_param(Rest, [])};
    420     _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc)
    421   end.
    422 
    423 %% @doc Parse RFC 2045 parameters.
    424 
    425 parse_before_param(<<>>, Params) ->
    426   lists:reverse(Params);
    427 parse_before_param(<< C, Rest/bits >>, Params) ->
    428   case C of
    429     $; -> parse_before_param(Rest, Params);
    430     $\s -> parse_before_param(Rest, Params);
    431     $\t -> parse_before_param(Rest, Params);
    432     _ -> ?LOWER(parse_param_name, Rest, Params, <<>>)
    433   end.
    434 
    435 parse_param_name(<<>>, Params, Acc) ->
    436   lists:reverse([{Acc, <<>>}|Params]);
    437 parse_param_name(<< C, Rest/bits >>, Params, Acc) ->
    438   case C of
    439     $= -> parse_param_value(Rest, Params, Acc);
    440     _ -> ?LOWER(parse_param_name, Rest, Params, Acc)
    441   end.
    442 
    443 parse_param_value(<<>>, Params, Name) ->
    444   lists:reverse([{Name, <<>>}|Params]);
    445 parse_param_value(<< C, Rest/bits >>, Params, Name) ->
    446   case C of
    447     $" -> parse_param_quoted_value(Rest, Params, Name, <<>>);
    448     $; -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    449     $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    450     $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]);
    451     C -> parse_param_value(Rest, Params, Name, << C >>)
    452   end.
    453 
    454 parse_param_value(<<>>, Params, Name, Acc) ->
    455   lists:reverse([{Name, Acc}|Params]);
    456 parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) ->
    457   case C of
    458     $; -> parse_before_param(Rest, [{Name, Acc}|Params]);
    459     $\s -> parse_before_param(Rest, [{Name, Acc}|Params]);
    460     $\t -> parse_before_param(Rest, [{Name, Acc}|Params]);
    461     C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>)
    462   end.
    463 
    464 %% We expect a final $" so no need to test for <<>>.
    465 parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) ->
    466   parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>);
    467 parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) ->
    468   parse_before_param(Rest, [{Name, Acc}|Params]);
    469 parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc)
    470     when C =/= $\r ->
    471   parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).