cow_http_struct_hd.erl (13724B)
1 %% Copyright (c) 2019, Loïc Hoguin <essen@ninenines.eu> 2 %% 3 %% Permission to use, copy, modify, and/or distribute this software for any 4 %% purpose with or without fee is hereby granted, provided that the above 5 %% copyright notice and this permission notice appear in all copies. 6 %% 7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 %% The mapping between Erlang and structured headers types is as follow: 16 %% 17 %% List: list() 18 %% Dictionary: map() 19 %% Bare item: one bare_item() that can be of type: 20 %% Integer: integer() 21 %% Float: float() 22 %% String: {string, binary()} 23 %% Token: {token, binary()} 24 %% Byte sequence: {binary, binary()} 25 %% Boolean: boolean() 26 %% And finally: 27 %% Type with Parameters: {with_params, Type, Parameters} 28 %% Parameters: [{binary(), bare_item()}] 29 30 -module(cow_http_struct_hd). 31 32 -export([parse_dictionary/1]). 33 -export([parse_item/1]). 34 -export([parse_list/1]). 35 -export([dictionary/1]). 36 -export([item/1]). 37 -export([list/1]). 38 39 -include("cow_parse.hrl"). 40 41 -type sh_list() :: [sh_item() | sh_inner_list()]. 42 -type sh_inner_list() :: sh_with_params([sh_item()]). 43 -type sh_params() :: #{binary() => sh_bare_item() | undefined}. 44 -type sh_dictionary() :: {#{binary() => sh_item() | sh_inner_list()}, [binary()]}. 45 -type sh_item() :: sh_with_params(sh_bare_item()). 46 -type sh_bare_item() :: integer() | float() | boolean() 47 | {string | token | binary, binary()}. 48 -type sh_with_params(Type) :: {with_params, Type, sh_params()}. 49 50 -define(IS_LC_ALPHA(C), 51 (C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or 52 (C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or 53 (C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or 54 (C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or 55 (C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or 56 (C =:= $z) 57 ). 58 59 %% Parsing. 60 61 -spec parse_dictionary(binary()) -> sh_dictionary(). 62 parse_dictionary(<<>>) -> 63 {#{}, []}; 64 parse_dictionary(<<C,R/bits>>) when ?IS_LC_ALPHA(C) -> 65 {Dict, Order, <<>>} = parse_dict_key(R, #{}, [], <<C>>), 66 {Dict, Order}. 67 68 parse_dict_key(<<$=,$(,R0/bits>>, Acc, Order, K) -> 69 false = maps:is_key(K, Acc), 70 {Item, R} = parse_inner_list(R0, []), 71 parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]); 72 parse_dict_key(<<$=,R0/bits>>, Acc, Order, K) -> 73 false = maps:is_key(K, Acc), 74 {Item, R} = parse_item1(R0), 75 parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]); 76 parse_dict_key(<<C,R/bits>>, Acc, Order, K) 77 when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) 78 or (C =:= $_) or (C =:= $-) or (C =:= $*) -> 79 parse_dict_key(R, Acc, Order, <<K/binary,C>>). 80 81 parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) -> 82 parse_dict_before_sep(R, Acc, Order); 83 parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when C =:= $, -> 84 parse_dict_before_member(R, Acc, Order); 85 parse_dict_before_sep(<<>>, Acc, Order) -> 86 {Acc, lists:reverse(Order), <<>>}. 87 88 parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) -> 89 parse_dict_before_member(R, Acc, Order); 90 parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_LC_ALPHA(C) -> 91 parse_dict_key(R, Acc, Order, <<C>>). 92 93 -spec parse_item(binary()) -> sh_item(). 94 parse_item(Bin) -> 95 {Item, <<>>} = parse_item1(Bin), 96 Item. 97 98 parse_item1(Bin) -> 99 case parse_bare_item(Bin) of 100 {Item, <<$;,R/bits>>} -> 101 {Params, Rest} = parse_before_param(R, #{}), 102 {{with_params, Item, Params}, Rest}; 103 {Item, Rest} -> 104 {{with_params, Item, #{}}, Rest} 105 end. 106 107 -spec parse_list(binary()) -> sh_list(). 108 parse_list(<<>>) -> 109 []; 110 parse_list(Bin) -> 111 parse_list_before_member(Bin, []). 112 113 parse_list_member(<<$(,R0/bits>>, Acc) -> 114 {Item, R} = parse_inner_list(R0, []), 115 parse_list_before_sep(R, [Item|Acc]); 116 parse_list_member(R0, Acc) -> 117 {Item, R} = parse_item1(R0), 118 parse_list_before_sep(R, [Item|Acc]). 119 120 parse_list_before_sep(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 121 parse_list_before_sep(R, Acc); 122 parse_list_before_sep(<<$,,R/bits>>, Acc) -> 123 parse_list_before_member(R, Acc); 124 parse_list_before_sep(<<>>, Acc) -> 125 lists:reverse(Acc). 126 127 parse_list_before_member(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 128 parse_list_before_member(R, Acc); 129 parse_list_before_member(R, Acc) -> 130 parse_list_member(R, Acc). 131 132 %% Internal. 133 134 parse_inner_list(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 135 parse_inner_list(R, Acc); 136 parse_inner_list(<<$),$;,R0/bits>>, Acc) -> 137 {Params, R} = parse_before_param(R0, #{}), 138 {{with_params, lists:reverse(Acc), Params}, R}; 139 parse_inner_list(<<$),R/bits>>, Acc) -> 140 {{with_params, lists:reverse(Acc), #{}}, R}; 141 parse_inner_list(R0, Acc) -> 142 {Item, R = <<C,_/bits>>} = parse_item1(R0), 143 true = (C =:= $\s) orelse (C =:= $)), 144 parse_inner_list(R, [Item|Acc]). 145 146 parse_before_param(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 147 parse_before_param(R, Acc); 148 parse_before_param(<<C,R/bits>>, Acc) when ?IS_LC_ALPHA(C) -> 149 parse_param(R, Acc, <<C>>). 150 151 parse_param(<<$;,R/bits>>, Acc, K) -> 152 parse_before_param(R, Acc#{K => undefined}); 153 parse_param(<<$=,R0/bits>>, Acc, K) -> 154 case parse_bare_item(R0) of 155 {Item, <<$;,R/bits>>} -> 156 false = maps:is_key(K, Acc), 157 parse_before_param(R, Acc#{K => Item}); 158 {Item, R} -> 159 false = maps:is_key(K, Acc), 160 {Acc#{K => Item}, R} 161 end; 162 parse_param(<<C,R/bits>>, Acc, K) 163 when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) 164 or (C =:= $_) or (C =:= $-) or (C =:= $*) -> 165 parse_param(R, Acc, <<K/binary,C>>); 166 parse_param(R, Acc, K) -> 167 false = maps:is_key(K, Acc), 168 {Acc#{K => undefined}, R}. 169 170 %% Integer or float. 171 parse_bare_item(<<$-,R/bits>>) -> parse_number(R, 0, <<$->>); 172 parse_bare_item(<<C,R/bits>>) when ?IS_DIGIT(C) -> parse_number(R, 1, <<C>>); 173 %% String. 174 parse_bare_item(<<$",R/bits>>) -> parse_string(R, <<>>); 175 %% Token. 176 parse_bare_item(<<C,R/bits>>) when ?IS_ALPHA(C) -> parse_token(R, <<C>>); 177 %% Byte sequence. 178 parse_bare_item(<<$*,R/bits>>) -> parse_binary(R, <<>>); 179 %% Boolean. 180 parse_bare_item(<<"?0",R/bits>>) -> {false, R}; 181 parse_bare_item(<<"?1",R/bits>>) -> {true, R}. 182 183 parse_number(<<C,R/bits>>, L, Acc) when ?IS_DIGIT(C) -> 184 parse_number(R, L+1, <<Acc/binary,C>>); 185 parse_number(<<C,R/bits>>, L, Acc) when C =:= $. -> 186 parse_float(R, L, 0, <<Acc/binary,C>>); 187 parse_number(R, L, Acc) when L =< 15 -> 188 {binary_to_integer(Acc), R}. 189 190 parse_float(<<C,R/bits>>, L1, L2, Acc) when ?IS_DIGIT(C) -> 191 parse_float(R, L1, L2+1, <<Acc/binary,C>>); 192 parse_float(R, L1, L2, Acc) when 193 L1 =< 9, L2 =< 6; 194 L1 =< 10, L2 =< 5; 195 L1 =< 11, L2 =< 4; 196 L1 =< 12, L2 =< 3; 197 L1 =< 13, L2 =< 2; 198 L1 =< 14, L2 =< 1 -> 199 {binary_to_float(Acc), R}. 200 201 parse_string(<<$\\,$",R/bits>>, Acc) -> 202 parse_string(R, <<Acc/binary,$">>); 203 parse_string(<<$\\,$\\,R/bits>>, Acc) -> 204 parse_string(R, <<Acc/binary,$\\>>); 205 parse_string(<<$",R/bits>>, Acc) -> 206 {{string, Acc}, R}; 207 parse_string(<<C,R/bits>>, Acc) when 208 C >= 16#20, C =< 16#21; 209 C >= 16#23, C =< 16#5b; 210 C >= 16#5d, C =< 16#7e -> 211 parse_string(R, <<Acc/binary,C>>). 212 213 parse_token(<<C,R/bits>>, Acc) when ?IS_TOKEN(C) or (C =:= $:) or (C =:= $/) -> 214 parse_token(R, <<Acc/binary,C>>); 215 parse_token(R, Acc) -> 216 {{token, Acc}, R}. 217 218 parse_binary(<<$*,R/bits>>, Acc) -> 219 {{binary, base64:decode(Acc)}, R}; 220 parse_binary(<<C,R/bits>>, Acc) when ?IS_ALPHANUM(C) or (C =:= $+) or (C =:= $/) or (C =:= $=) -> 221 parse_binary(R, <<Acc/binary,C>>). 222 223 -ifdef(TEST). 224 parse_struct_hd_test_() -> 225 Files = filelib:wildcard("deps/structured-header-tests/*.json"), 226 lists:flatten([begin 227 {ok, JSON} = file:read_file(File), 228 Tests = jsx:decode(JSON, [return_maps]), 229 [ 230 {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() -> 231 %% The implementation is strict. We fail whenever we can. 232 CanFail = maps:get(<<"can_fail">>, Test, false), 233 MustFail = maps:get(<<"must_fail">>, Test, false), 234 Expected = case MustFail of 235 true -> undefined; 236 false -> expected_to_term(maps:get(<<"expected">>, Test)) 237 end, 238 Raw = raw_to_binary(Raw0), 239 case HeaderType of 240 <<"dictionary">> when MustFail; CanFail -> 241 {'EXIT', _} = (catch parse_dictionary(Raw)); 242 %% The test "binary.json: non-zero pad bits" does not fail 243 %% due to our reliance on Erlang/OTP's base64 module. 244 <<"item">> when CanFail -> 245 case (catch parse_item(Raw)) of 246 {'EXIT', _} -> ok; 247 Expected -> ok 248 end; 249 <<"item">> when MustFail -> 250 {'EXIT', _} = (catch parse_item(Raw)); 251 <<"list">> when MustFail; CanFail -> 252 {'EXIT', _} = (catch parse_list(Raw)); 253 <<"dictionary">> -> 254 {Expected, _Order} = (catch parse_dictionary(Raw)); 255 <<"item">> -> 256 Expected = (catch parse_item(Raw)); 257 <<"list">> -> 258 Expected = (catch parse_list(Raw)) 259 end 260 end} 261 || Test=#{ 262 <<"name">> := Name, 263 <<"header_type">> := HeaderType, 264 <<"raw">> := Raw0 265 } <- Tests] 266 end || File <- Files]). 267 268 %% Item. 269 expected_to_term(E=[_, Params]) when is_map(Params) -> 270 e2t(E); 271 %% Outer list. 272 expected_to_term(Expected) when is_list(Expected) -> 273 [e2t(E) || E <- Expected]; 274 expected_to_term(Expected) -> 275 e2t(Expected). 276 277 %% Dictionary. 278 e2t(Dict) when is_map(Dict) -> 279 maps:map(fun(_, V) -> e2t(V) end, Dict); 280 %% Inner list. 281 e2t([List, Params]) when is_list(List) -> 282 {with_params, [e2t(E) || E <- List], 283 maps:map(fun(_, P) -> e2tb(P) end, Params)}; 284 %% Item. 285 e2t([Bare, Params]) -> 286 {with_params, e2tb(Bare), 287 maps:map(fun(_, P) -> e2tb(P) end, Params)}. 288 289 %% Bare item. 290 e2tb(#{<<"__type">> := <<"token">>, <<"value">> := V}) -> 291 {token, V}; 292 e2tb(#{<<"__type">> := <<"binary">>, <<"value">> := V}) -> 293 {binary, base32:decode(V)}; 294 e2tb(V) when is_binary(V) -> 295 {string, V}; 296 e2tb(null) -> 297 undefined; 298 e2tb(V) -> 299 V. 300 301 %% The Cowlib parsers currently do not support resuming parsing 302 %% in the case of multiple headers. To make tests work we modify 303 %% the raw value the same way Cowboy does when encountering 304 %% multiple headers: by adding a comma and space in between. 305 %% 306 %% Similarly, the Cowlib parsers expect the leading and trailing 307 %% whitespace to be removed before calling the parser. 308 raw_to_binary(RawList) -> 309 trim_ws(iolist_to_binary(lists:join(<<", ">>, RawList))). 310 311 trim_ws(<<C,R/bits>>) when ?IS_WS(C) -> trim_ws(R); 312 trim_ws(R) -> trim_ws_end(R, byte_size(R) - 1). 313 314 trim_ws_end(_, -1) -> 315 <<>>; 316 trim_ws_end(Value, N) -> 317 case binary:at(Value, N) of 318 $\s -> trim_ws_end(Value, N - 1); 319 $\t -> trim_ws_end(Value, N - 1); 320 _ -> 321 S = N + 1, 322 << Value2:S/binary, _/bits >> = Value, 323 Value2 324 end. 325 -endif. 326 327 %% Building. 328 329 -spec dictionary(#{binary() => sh_item() | sh_inner_list()} 330 | [{binary(), sh_item() | sh_inner_list()}]) 331 -> iolist(). 332 %% @todo Also accept this? dictionary({Map, Order}) -> 333 dictionary(Map) when is_map(Map) -> 334 dictionary(maps:to_list(Map)); 335 dictionary(KVList) when is_list(KVList) -> 336 lists:join(<<", ">>, [ 337 [Key, $=, item_or_inner_list(Value)] 338 || {Key, Value} <- KVList]). 339 340 -spec item(sh_item()) -> iolist(). 341 item({with_params, BareItem, Params}) -> 342 [bare_item(BareItem), params(Params)]. 343 344 -spec list(sh_list()) -> iolist(). 345 list(List) -> 346 lists:join(<<", ">>, [item_or_inner_list(Value) || Value <- List]). 347 348 item_or_inner_list(Value={with_params, List, _}) when is_list(List) -> 349 inner_list(Value); 350 item_or_inner_list(Value) -> 351 item(Value). 352 353 inner_list({with_params, List, Params}) -> 354 [$(, lists:join($\s, [item(Value) || Value <- List]), $), params(Params)]. 355 356 bare_item({string, String}) -> 357 [$", escape_string(String, <<>>), $"]; 358 bare_item({token, Token}) -> 359 Token; 360 bare_item({binary, Binary}) -> 361 [$*, base64:encode(Binary), $*]; 362 bare_item(Integer) when is_integer(Integer) -> 363 integer_to_binary(Integer); 364 %% In order to properly reproduce the float as a string we 365 %% must first determine how many decimals we want in the 366 %% fractional component, otherwise rounding errors may occur. 367 bare_item(Float) when is_float(Float) -> 368 Decimals = case trunc(Float) of 369 I when I >= 10000000000000 -> 1; 370 I when I >= 1000000000000 -> 2; 371 I when I >= 100000000000 -> 3; 372 I when I >= 10000000000 -> 4; 373 I when I >= 1000000000 -> 5; 374 _ -> 6 375 end, 376 float_to_binary(Float, [{decimals, Decimals}, compact]); 377 bare_item(true) -> 378 <<"?1">>; 379 bare_item(false) -> 380 <<"?0">>. 381 382 escape_string(<<>>, Acc) -> Acc; 383 escape_string(<<$\\,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$\\>>); 384 escape_string(<<$",R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$">>); 385 escape_string(<<C,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,C>>). 386 387 params(Params) -> 388 maps:fold(fun 389 (Key, undefined, Acc) -> 390 [[$;, Key]|Acc]; 391 (Key, Value, Acc) -> 392 [[$;, Key, $=, bare_item(Value)]|Acc] 393 end, [], Params). 394 395 -ifdef(TEST). 396 struct_hd_identity_test_() -> 397 Files = filelib:wildcard("deps/structured-header-tests/*.json"), 398 lists:flatten([begin 399 {ok, JSON} = file:read_file(File), 400 Tests = jsx:decode(JSON, [return_maps]), 401 [ 402 {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() -> 403 Expected = expected_to_term(Expected0), 404 case HeaderType of 405 <<"dictionary">> -> 406 {Expected, _Order} = parse_dictionary(iolist_to_binary(dictionary(Expected))); 407 <<"item">> -> 408 Expected = parse_item(iolist_to_binary(item(Expected))); 409 <<"list">> -> 410 Expected = parse_list(iolist_to_binary(list(Expected))) 411 end 412 end} 413 || #{ 414 <<"name">> := Name, 415 <<"header_type">> := HeaderType, 416 %% We only run tests that must not fail. 417 <<"expected">> := Expected0 418 } <- Tests] 419 end || File <- Files]). 420 -endif.