link_parser.ex (4635B)
1 defmodule EarmarkParser.Parser.LinkParser do 2 3 @moduledoc false 4 import EarmarkParser.Helpers.LeexHelpers, only: [tokenize: 2] 5 import EarmarkParser.Helpers.YeccHelpers, only: [parse!: 2] 6 import EarmarkParser.Helpers.StringHelpers, only: [behead: 2] 7 8 # Hopefully this will go away in v1.3 9 # ********************************** 10 # 11 # Right now it needs to parse the url part of strings according to the following grammar 12 # 13 # url -> ( inner_url ) 14 # url -> ( inner_url title ) 15 # 16 # inner_url -> ( inner_url ) 17 # inner_url -> [ inner_url ] 18 # inner_url -> url_char* 19 # 20 # url_char -> . - quote - ( - ) - [ - ] 21 # 22 # title -> quote .* quote ;; not LALR-k here 23 # 24 # quote -> " 25 # quote -> ' ;; yep allowing '...." for now 26 # 27 # non_quote -> . - quote 28 29 @doc false 30 def parse_link(src, lnb) do 31 case parse!(src, lexer: :link_text_lexer, parser: :link_text_parser) do 32 {link_or_img, link_text, parsed_text} -> 33 beheaded = behead(src, to_string(parsed_text)) 34 tokens = tokenize(beheaded, with: :link_text_lexer) 35 p_url(tokens, lnb) |> make_result(to_string(link_text), to_string(parsed_text), link_or_img) 36 _ -> nil 37 end 38 end 39 40 defp p_url([{:open_paren, _} | ts], lnb), do: url(ts, {[], [], nil}, [:close_paren], lnb) 41 defp p_url(_, _), do: nil 42 43 # push one level 44 defp url([{:open_paren, text} | ts], result, needed, lnb), 45 do: url(ts, add(result, text), [:close_paren | needed], lnb) 46 47 # pop last level 48 defp url([{:close_paren, _} | _], result, [:close_paren], _lnb), do: result 49 # pop inner level 50 defp url([{:close_paren, text} | ts], result, [:close_paren | needed], lnb), 51 do: url(ts, add(result, text), needed, lnb) 52 53 # A quote on level 0 -> bailing out if there is a matching quote 54 defp url(ts_all = [{:open_title, text} | ts], result, [:close_paren], lnb) do 55 case bail_out_to_title(ts_all, result) do 56 nil -> url(ts, add(result, text), [:close_paren], lnb) 57 res -> res 58 end 59 end 60 61 # All these are just added to the url 62 defp url([{:open_bracket, text} | ts], result, needed, lnb), 63 do: url(ts, add(result, text), needed, lnb) 64 defp url([{:close_bracket, text} | ts], result, needed, lnb), 65 do: url(ts, add(result, text), needed, lnb) 66 defp url([{:any_quote, text} | ts], result, needed, lnb), 67 do: url(ts, add(result, text), needed, lnb) 68 defp url([{:verbatim, text} | ts], result, needed, lnb), 69 do: url(ts, add(result, text), needed, lnb) 70 defp url([{:ws, text} | ts], result, needed, lnb), 71 do: url(ts, add(result, text), needed, lnb) 72 defp url([{:escaped, text} | ts], result, needed, lnb), 73 do: url(ts, add(result, text), needed, lnb) 74 75 # That is not good, actually this is not a legal url part of a link 76 defp url(_, _, _, _), do: nil 77 78 defp bail_out_to_title(ts, result) do 79 with remaining_text <- ts |> Enum.map(&text_of_token/1) |> Enum.join("") do 80 case title(remaining_text) do 81 nil -> nil 82 {title_text, inner_title} -> 83 add_title(result, {title_text, inner_title}) 84 end 85 end 86 end 87 88 defp text_of_token(token) 89 defp text_of_token({:escaped, text}), do: "\\#{text}" 90 defp text_of_token({_, text}) do 91 text 92 end 93 94 # sic!!! Greedy and not context aware, matching '..." and "...' for backward comp 95 @title_rgx ~r{\A\s+(['"])(.*?)\1(?=\))} 96 defp title(remaining_text) do 97 case Regex.run(@title_rgx, remaining_text) do 98 nil -> nil 99 [parsed, _, inner] -> {parsed, inner} 100 end 101 end 102 103 @wikilink_rgx ~r{\A\[\[([^\]\|]+)(?:\|([^\]]+))?\]\]\Z} 104 defp make_result(nil, _, parsed_text, :link) do 105 case Regex.run(@wikilink_rgx, parsed_text) do 106 nil -> nil 107 [_, wikilink] -> make_wikilink(parsed_text, wikilink, wikilink) 108 [_, wikilink, link_text] -> make_wikilink(parsed_text, wikilink, link_text) 109 end 110 end 111 112 defp make_result(nil, _, _, _), do: nil 113 114 defp make_result({parsed, url, title}, link_text, parsed_text, link_or_img) do 115 {"#{parsed_text}(#{list_to_text(parsed)})", link_text, list_to_text(url), title, link_or_img} 116 end 117 118 defp add({parsed_text, url_text, nil}, text), do: {[text | parsed_text], [text | url_text], nil} 119 120 defp add_title({parsed_text, url_text, _}, {parsed, inner}), 121 do: {[parsed | parsed_text], url_text, inner} 122 123 defp make_wikilink(parsed_text, target, link_text) do 124 {parsed_text, String.trim(link_text), String.trim(target), nil, :wikilink} 125 end 126 127 defp list_to_text(lst), do: lst |> Enum.reverse() |> Enum.join("") 128 end 129 130 # SPDX-License-Identifier: Apache-2.0