cow_link.erl (15516B)
1 %% Copyright (c) 2019, Loïc Hoguin <essen@ninenines.eu> 2 %% 3 %% Permission to use, copy, modify, and/or distribute this software for any 4 %% purpose with or without fee is hereby granted, provided that the above 5 %% copyright notice and this permission notice appear in all copies. 6 %% 7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 -module(cow_link). 16 -compile({no_auto_import, [link/1]}). 17 18 -export([parse_link/1]). 19 -export([resolve_link/2]). 20 -export([resolve_link/3]). 21 -export([link/1]). 22 23 -include("cow_inline.hrl"). 24 -include("cow_parse.hrl"). 25 26 -type link() :: #{ 27 target := binary(), 28 rel := binary(), 29 attributes := [{binary(), binary()}] 30 }. 31 -export_type([link/0]). 32 33 -type resolve_opts() :: #{ 34 allow_anchor => boolean() 35 }. 36 37 -type uri() :: uri_string:uri_map() | uri_string:uri_string() | undefined. 38 39 %% Parse a link header. 40 41 %% This function returns the URI target from the header directly. 42 %% Relative URIs must then be resolved as per RFC3986 5. In some 43 %% cases it might not be possible to resolve URIs, for example when 44 %% the link header is returned with a 404 status code. 45 -spec parse_link(binary()) -> [link()]. 46 parse_link(Link) -> 47 before_target(Link, []). 48 49 before_target(<<>>, Acc) -> lists:reverse(Acc); 50 before_target(<<$<,R/bits>>, Acc) -> target(R, Acc, <<>>); 51 before_target(<<C,R/bits>>, Acc) when ?IS_WS(C) -> before_target(R, Acc). 52 53 target(<<$>,R/bits>>, Acc, T) -> param_sep(R, Acc, T, []); 54 target(<<C,R/bits>>, Acc, T) -> target(R, Acc, <<T/binary, C>>). 55 56 param_sep(<<>>, Acc, T, P) -> lists:reverse(acc_link(Acc, T, P)); 57 param_sep(<<$,,R/bits>>, Acc, T, P) -> before_target(R, acc_link(Acc, T, P)); 58 param_sep(<<$;,R/bits>>, Acc, T, P) -> before_param(R, Acc, T, P); 59 param_sep(<<C,R/bits>>, Acc, T, P) when ?IS_WS(C) -> param_sep(R, Acc, T, P). 60 61 before_param(<<C,R/bits>>, Acc, T, P) when ?IS_WS(C) -> before_param(R, Acc, T, P); 62 before_param(<<C,R/bits>>, Acc, T, P) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, <<>>). 63 64 param(<<$=,$",R/bits>>, Acc, T, P, K) -> quoted(R, Acc, T, P, K, <<>>); 65 param(<<$=,C,R/bits>>, Acc, T, P, K) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <<C>>); 66 param(<<C,R/bits>>, Acc, T, P, K) when ?IS_TOKEN(C) -> ?LOWER(param, R, Acc, T, P, K). 67 68 quoted(<<$",R/bits>>, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]); 69 quoted(<<$\\,C,R/bits>>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <<V/binary,C>>); 70 quoted(<<C,R/bits>>, Acc, T, P, K, V) when ?IS_VCHAR_OBS(C) -> quoted(R, Acc, T, P, K, <<V/binary,C>>). 71 72 value(<<C,R/bits>>, Acc, T, P, K, V) when ?IS_TOKEN(C) -> value(R, Acc, T, P, K, <<V/binary,C>>); 73 value(R, Acc, T, P, K, V) -> param_sep(R, Acc, T, [{K, V}|P]). 74 75 acc_link(Acc, Target, Params0) -> 76 Params1 = lists:reverse(Params0), 77 %% The rel parameter MUST be present. (RFC8288 3.3) 78 {value, {_, Rel}, Params2} = lists:keytake(<<"rel">>, 1, Params1), 79 %% Occurrences after the first MUST be ignored by parsers. 80 Params = filter_out_duplicates(Params2, #{}), 81 [#{ 82 target => Target, 83 rel => ?LOWER(Rel), 84 attributes => Params 85 }|Acc]. 86 87 %% This function removes duplicates for attributes that don't allow them. 88 filter_out_duplicates([], _) -> 89 []; 90 %% The "rel" is mandatory and was already removed from params. 91 filter_out_duplicates([{<<"rel">>, _}|Tail], State) -> 92 filter_out_duplicates(Tail, State); 93 filter_out_duplicates([{<<"anchor">>, _}|Tail], State=#{anchor := true}) -> 94 filter_out_duplicates(Tail, State); 95 filter_out_duplicates([{<<"media">>, _}|Tail], State=#{media := true}) -> 96 filter_out_duplicates(Tail, State); 97 filter_out_duplicates([{<<"title">>, _}|Tail], State=#{title := true}) -> 98 filter_out_duplicates(Tail, State); 99 filter_out_duplicates([{<<"title*">>, _}|Tail], State=#{title_star := true}) -> 100 filter_out_duplicates(Tail, State); 101 filter_out_duplicates([{<<"type">>, _}|Tail], State=#{type := true}) -> 102 filter_out_duplicates(Tail, State); 103 filter_out_duplicates([Tuple={<<"anchor">>, _}|Tail], State) -> 104 [Tuple|filter_out_duplicates(Tail, State#{anchor => true})]; 105 filter_out_duplicates([Tuple={<<"media">>, _}|Tail], State) -> 106 [Tuple|filter_out_duplicates(Tail, State#{media => true})]; 107 filter_out_duplicates([Tuple={<<"title">>, _}|Tail], State) -> 108 [Tuple|filter_out_duplicates(Tail, State#{title => true})]; 109 filter_out_duplicates([Tuple={<<"title*">>, _}|Tail], State) -> 110 [Tuple|filter_out_duplicates(Tail, State#{title_star => true})]; 111 filter_out_duplicates([Tuple={<<"type">>, _}|Tail], State) -> 112 [Tuple|filter_out_duplicates(Tail, State#{type => true})]; 113 filter_out_duplicates([Tuple|Tail], State) -> 114 [Tuple|filter_out_duplicates(Tail, State)]. 115 116 -ifdef(TEST). 117 parse_link_test_() -> 118 Tests = [ 119 {<<>>, []}, 120 {<<" ">>, []}, 121 %% Examples from the RFC. 122 {<<"<http://example.com/TheBook/chapter2>; rel=\"previous\"; title=\"previous chapter\"">>, [ 123 #{ 124 target => <<"http://example.com/TheBook/chapter2">>, 125 rel => <<"previous">>, 126 attributes => [ 127 {<<"title">>, <<"previous chapter">>} 128 ] 129 } 130 ]}, 131 {<<"</>; rel=\"http://example.net/foo\"">>, [ 132 #{ 133 target => <<"/">>, 134 rel => <<"http://example.net/foo">>, 135 attributes => [] 136 } 137 ]}, 138 {<<"</terms>; rel=\"copyright\"; anchor=\"#foo\"">>, [ 139 #{ 140 target => <<"/terms">>, 141 rel => <<"copyright">>, 142 attributes => [ 143 {<<"anchor">>, <<"#foo">>} 144 ] 145 } 146 ]}, 147 % {<<"</TheBook/chapter2>; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, " 148 % "</TheBook/chapter4>; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [ 149 % %% @todo 150 % ]} 151 {<<"<http://example.org/>; rel=\"start http://example.net/relation/other\"">>, [ 152 #{ 153 target => <<"http://example.org/">>, 154 rel => <<"start http://example.net/relation/other">>, 155 attributes => [] 156 } 157 ]}, 158 {<<"<https://example.org/>; rel=\"start\", " 159 "<https://example.org/index>; rel=\"index\"">>, [ 160 #{ 161 target => <<"https://example.org/">>, 162 rel => <<"start">>, 163 attributes => [] 164 }, 165 #{ 166 target => <<"https://example.org/index">>, 167 rel => <<"index">>, 168 attributes => [] 169 } 170 ]}, 171 %% Relation types are case insensitive. 172 {<<"</>; rel=\"SELF\"">>, [ 173 #{ 174 target => <<"/">>, 175 rel => <<"self">>, 176 attributes => [] 177 } 178 ]}, 179 {<<"</>; rel=\"HTTP://EXAMPLE.NET/FOO\"">>, [ 180 #{ 181 target => <<"/">>, 182 rel => <<"http://example.net/foo">>, 183 attributes => [] 184 } 185 ]}, 186 %% Attribute names are case insensitive. 187 {<<"</terms>; REL=\"copyright\"; ANCHOR=\"#foo\"">>, [ 188 #{ 189 target => <<"/terms">>, 190 rel => <<"copyright">>, 191 attributes => [ 192 {<<"anchor">>, <<"#foo">>} 193 ] 194 } 195 ]} 196 ], 197 [{V, fun() -> R = parse_link(V) end} || {V, R} <- Tests]. 198 -endif. 199 200 %% Resolve a link based on the context URI and options. 201 202 -spec resolve_link(Link, uri()) -> Link | false when Link::link(). 203 resolve_link(Link, ContextURI) -> 204 resolve_link(Link, ContextURI, #{}). 205 206 -spec resolve_link(Link, uri(), resolve_opts()) -> Link | false when Link::link(). 207 %% When we do not have a context URI we only succeed when the target URI is absolute. 208 %% The target URI will only be normalized in that case. 209 resolve_link(Link=#{target := TargetURI}, undefined, _) -> 210 case uri_string:parse(TargetURI) of 211 URIMap = #{scheme := _} -> 212 Link#{target => uri_string:normalize(URIMap)}; 213 _ -> 214 false 215 end; 216 resolve_link(Link=#{attributes := Params}, ContextURI, Opts) -> 217 AllowAnchor = maps:get(allow_anchor, Opts, true), 218 case lists:keyfind(<<"anchor">>, 1, Params) of 219 false -> 220 do_resolve_link(Link, ContextURI); 221 {_, Anchor} when AllowAnchor -> 222 do_resolve_link(Link, resolve(Anchor, ContextURI)); 223 _ -> 224 false 225 end. 226 227 do_resolve_link(Link=#{target := TargetURI}, ContextURI) -> 228 Link#{target => uri_string:recompose(resolve(TargetURI, ContextURI))}. 229 230 -ifdef(TEST). 231 resolve_link_test_() -> 232 Tests = [ 233 %% No context URI available. 234 {#{target => <<"http://a/b/./c">>}, undefined, #{}, 235 #{target => <<"http://a/b/c">>}}, 236 {#{target => <<"a/b/./c">>}, undefined, #{}, 237 false}, 238 %% Context URI available, allow_anchor => true. 239 {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{}, 240 #{target => <<"http://a/b">>, attributes => []}}, 241 {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{}, 242 #{target => <<"http://a/b">>, attributes => []}}, 243 {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}, <<"http://a/c">>, #{}, 244 #{target => <<"http://a/b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}}, 245 {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}, <<"http://a/c">>, #{}, 246 #{target => <<"http://a/d/b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}}, 247 %% Context URI available, allow_anchor => false. 248 {#{target => <<"http://a/b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false}, 249 #{target => <<"http://a/b">>, attributes => []}}, 250 {#{target => <<"b">>, attributes => []}, <<"http://a/c">>, #{allow_anchor => false}, 251 #{target => <<"http://a/b">>, attributes => []}}, 252 {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"#frag">>}]}, 253 <<"http://a/c">>, #{allow_anchor => false}, false}, 254 {#{target => <<"b">>, attributes => [{<<"anchor">>, <<"d/e">>}]}, 255 <<"http://a/c">>, #{allow_anchor => false}, false} 256 ], 257 [{iolist_to_binary(io_lib:format("~0p", [L])), 258 fun() -> R = resolve_link(L, C, O) end} || {L, C, O, R} <- Tests]. 259 -endif. 260 261 %% @todo This function has been added to Erlang/OTP 22.3 as uri_string:resolve/2,3. 262 resolve(URI, BaseURI) -> 263 case resolve1(ensure_map_uri(URI), BaseURI) of 264 TargetURI = #{path := Path0} -> 265 %% We remove dot segments. Normalizing the entire URI 266 %% will sometimes add an extra slash we don't want. 267 #{path := Path} = uri_string:normalize(#{path => Path0}, [return_map]), 268 TargetURI#{path => Path}; 269 TargetURI -> 270 TargetURI 271 end. 272 273 resolve1(URI=#{scheme := _}, _) -> 274 URI; 275 resolve1(URI=#{host := _}, BaseURI) -> 276 #{scheme := Scheme} = ensure_map_uri(BaseURI), 277 URI#{scheme => Scheme}; 278 resolve1(URI=#{path := <<>>}, BaseURI0) -> 279 BaseURI = ensure_map_uri(BaseURI0), 280 Keys = case maps:is_key(query, URI) of 281 true -> [scheme, host, port, path]; 282 false -> [scheme, host, port, path, query] 283 end, 284 maps:merge(URI, maps:with(Keys, BaseURI)); 285 resolve1(URI=#{path := <<"/",_/bits>>}, BaseURI0) -> 286 BaseURI = ensure_map_uri(BaseURI0), 287 maps:merge(URI, maps:with([scheme, host, port], BaseURI)); 288 resolve1(URI=#{path := Path}, BaseURI0) -> 289 BaseURI = ensure_map_uri(BaseURI0), 290 maps:merge( 291 URI#{path := merge_paths(Path, BaseURI)}, 292 maps:with([scheme, host, port], BaseURI)). 293 294 merge_paths(Path, #{host := _, path := <<>>}) -> 295 <<$/, Path/binary>>; 296 merge_paths(Path, #{path := BasePath0}) -> 297 case string:split(BasePath0, <<$/>>, trailing) of 298 [BasePath, _] -> <<BasePath/binary, $/, Path/binary>>; 299 [_] -> <<$/, Path/binary>> 300 end. 301 302 ensure_map_uri(URI) when is_map(URI) -> URI; 303 ensure_map_uri(URI) -> uri_string:parse(iolist_to_binary(URI)). 304 305 -ifdef(TEST). 306 resolve_test_() -> 307 Tests = [ 308 %% 5.4.1. Normal Examples 309 {<<"g:h">>, <<"g:h">>}, 310 {<<"g">>, <<"http://a/b/c/g">>}, 311 {<<"./g">>, <<"http://a/b/c/g">>}, 312 {<<"g/">>, <<"http://a/b/c/g/">>}, 313 {<<"/g">>, <<"http://a/g">>}, 314 {<<"//g">>, <<"http://g">>}, 315 {<<"?y">>, <<"http://a/b/c/d;p?y">>}, 316 {<<"g?y">>, <<"http://a/b/c/g?y">>}, 317 {<<"#s">>, <<"http://a/b/c/d;p?q#s">>}, 318 {<<"g#s">>, <<"http://a/b/c/g#s">>}, 319 {<<"g?y#s">>, <<"http://a/b/c/g?y#s">>}, 320 {<<";x">>, <<"http://a/b/c/;x">>}, 321 {<<"g;x">>, <<"http://a/b/c/g;x">>}, 322 {<<"g;x?y#s">>, <<"http://a/b/c/g;x?y#s">>}, 323 {<<"">>, <<"http://a/b/c/d;p?q">>}, 324 {<<".">>, <<"http://a/b/c/">>}, 325 {<<"./">>, <<"http://a/b/c/">>}, 326 {<<"..">>, <<"http://a/b/">>}, 327 {<<"../">>, <<"http://a/b/">>}, 328 {<<"../g">>, <<"http://a/b/g">>}, 329 {<<"../..">>, <<"http://a/">>}, 330 {<<"../../">>, <<"http://a/">>}, 331 {<<"../../g">>, <<"http://a/g">>}, 332 %% 5.4.2. Abnormal Examples 333 {<<"../../../g">>, <<"http://a/g">>}, 334 {<<"../../../../g">>, <<"http://a/g">>}, 335 {<<"/./g">>, <<"http://a/g">>}, 336 {<<"/../g">>, <<"http://a/g">>}, 337 {<<"g.">>, <<"http://a/b/c/g.">>}, 338 {<<".g">>, <<"http://a/b/c/.g">>}, 339 {<<"g..">>, <<"http://a/b/c/g..">>}, 340 {<<"..g">>, <<"http://a/b/c/..g">>}, 341 {<<"./../g">>, <<"http://a/b/g">>}, 342 {<<"./g/.">>, <<"http://a/b/c/g/">>}, 343 {<<"g/./h">>, <<"http://a/b/c/g/h">>}, 344 {<<"g/../h">>, <<"http://a/b/c/h">>}, 345 {<<"g;x=1/./y">>, <<"http://a/b/c/g;x=1/y">>}, 346 {<<"g;x=1/../y">>, <<"http://a/b/c/y">>}, 347 {<<"g?y/./x">>, <<"http://a/b/c/g?y/./x">>}, 348 {<<"g?y/../x">>, <<"http://a/b/c/g?y/../x">>}, 349 {<<"g#s/./x">>, <<"http://a/b/c/g#s/./x">>}, 350 {<<"g#s/../x">>, <<"http://a/b/c/g#s/../x">>}, 351 {<<"http:g">>, <<"http:g">>} %% for strict parsers 352 ], 353 [{V, fun() -> R = uri_string:recompose(resolve(V, <<"http://a/b/c/d;p?q">>)) end} || {V, R} <- Tests]. 354 -endif. 355 356 %% Build a link header. 357 358 -spec link([#{ 359 target := binary(), 360 rel := binary(), 361 attributes := [{binary(), binary()}] 362 }]) -> iodata(). 363 link(Links) -> 364 lists:join(<<", ">>, [do_link(Link) || Link <- Links]). 365 366 do_link(#{target := TargetURI, rel := Rel, attributes := Params}) -> 367 [ 368 $<, TargetURI, <<">" 369 "; rel=\"">>, Rel, $", 370 [[<<"; ">>, Key, <<"=\"">>, escape(iolist_to_binary(Value), <<>>), $"] 371 || {Key, Value} <- Params] 372 ]. 373 374 escape(<<>>, Acc) -> Acc; 375 escape(<<$\\,R/bits>>, Acc) -> escape(R, <<Acc/binary,$\\,$\\>>); 376 escape(<<$\",R/bits>>, Acc) -> escape(R, <<Acc/binary,$\\,$\">>); 377 escape(<<C,R/bits>>, Acc) -> escape(R, <<Acc/binary,C>>). 378 379 -ifdef(TEST). 380 link_test_() -> 381 Tests = [ 382 {<<>>, []}, 383 %% Examples from the RFC. 384 {<<"<http://example.com/TheBook/chapter2>; rel=\"previous\"; title=\"previous chapter\"">>, [ 385 #{ 386 target => <<"http://example.com/TheBook/chapter2">>, 387 rel => <<"previous">>, 388 attributes => [ 389 {<<"title">>, <<"previous chapter">>} 390 ] 391 } 392 ]}, 393 {<<"</>; rel=\"http://example.net/foo\"">>, [ 394 #{ 395 target => <<"/">>, 396 rel => <<"http://example.net/foo">>, 397 attributes => [] 398 } 399 ]}, 400 {<<"</terms>; rel=\"copyright\"; anchor=\"#foo\"">>, [ 401 #{ 402 target => <<"/terms">>, 403 rel => <<"copyright">>, 404 attributes => [ 405 {<<"anchor">>, <<"#foo">>} 406 ] 407 } 408 ]}, 409 % {<<"</TheBook/chapter2>; rel=\"previous\"; title*=UTF-8'de'letztes%20Kapitel, " 410 % "</TheBook/chapter4>; rel=\"next\"; title*=UTF-8'de'n%c3%a4chstes%20Kapitel">>, [ 411 % %% @todo 412 % ]} 413 {<<"<http://example.org/>; rel=\"start http://example.net/relation/other\"">>, [ 414 #{ 415 target => <<"http://example.org/">>, 416 rel => <<"start http://example.net/relation/other">>, 417 attributes => [] 418 } 419 ]}, 420 {<<"<https://example.org/>; rel=\"start\", " 421 "<https://example.org/index>; rel=\"index\"">>, [ 422 #{ 423 target => <<"https://example.org/">>, 424 rel => <<"start">>, 425 attributes => [] 426 }, 427 #{ 428 target => <<"https://example.org/index">>, 429 rel => <<"index">>, 430 attributes => [] 431 } 432 ]}, 433 {<<"</>; rel=\"previous\"; quoted=\"name=\\\"value\\\"\"">>, [ 434 #{ 435 target => <<"/">>, 436 rel => <<"previous">>, 437 attributes => [ 438 {<<"quoted">>, <<"name=\"value\"">>} 439 ] 440 } 441 ]} 442 ], 443 [{iolist_to_binary(io_lib:format("~0p", [V])), 444 fun() -> R = iolist_to_binary(link(V)) end} || {R, V} <- Tests]. 445 -endif.