zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

link_parser.ex (4635B)


      1 defmodule EarmarkParser.Parser.LinkParser do
      2 
      3   @moduledoc false
      4   import EarmarkParser.Helpers.LeexHelpers, only: [tokenize: 2]
      5   import EarmarkParser.Helpers.YeccHelpers, only: [parse!: 2]
      6   import EarmarkParser.Helpers.StringHelpers, only: [behead: 2]
      7 
      8   # Hopefully this will go away in v1.3
      9   # **********************************
     10   #
     11   # Right now it needs to parse the url part of strings according to the following grammar
     12   #
     13   #      url -> ( inner_url )
     14   #      url -> ( inner_url title )
     15   #
     16   #      inner_url   -> ( inner_url )
     17   #      inner_url   -> [ inner_url ]
     18   #      inner_url   ->  url_char*
     19   #
     20   #      url_char -> . - quote - ( - ) - [ - ]
     21   #
     22   #      title -> quote .* quote  ;;   not LALR-k here
     23   #
     24   #      quote ->  "
     25   #      quote ->  '              ;;  yep allowing '...." for now
     26   #
     27   #      non_quote -> . - quote
     28 
     29   @doc false
     30   def parse_link(src, lnb) do
     31     case parse!(src, lexer: :link_text_lexer, parser: :link_text_parser) do
     32         {link_or_img, link_text, parsed_text} ->
     33          beheaded  = behead(src, to_string(parsed_text))
     34          tokens    = tokenize(beheaded, with: :link_text_lexer)
     35          p_url(tokens, lnb) |> make_result(to_string(link_text), to_string(parsed_text), link_or_img)
     36         _ -> nil
     37     end
     38   end
     39 
     40   defp p_url([{:open_paren, _} | ts], lnb), do: url(ts, {[], [], nil}, [:close_paren], lnb)
     41   defp p_url(_, _), do: nil
     42 
     43   # push one level
     44   defp url([{:open_paren, text} | ts], result, needed, lnb),
     45     do: url(ts, add(result, text), [:close_paren | needed], lnb)
     46 
     47   # pop last level
     48   defp url([{:close_paren, _} | _], result, [:close_paren], _lnb), do: result
     49   # pop inner level
     50   defp url([{:close_paren, text} | ts], result, [:close_paren | needed], lnb),
     51     do: url(ts, add(result, text), needed, lnb)
     52 
     53   # A quote on level 0 -> bailing out if there is a matching quote
     54   defp url(ts_all = [{:open_title, text} | ts], result, [:close_paren], lnb) do
     55     case bail_out_to_title(ts_all, result) do
     56       nil -> url(ts, add(result, text), [:close_paren], lnb)
     57       res -> res
     58     end
     59   end
     60 
     61   # All these are just added to the url
     62   defp url([{:open_bracket, text} | ts], result, needed, lnb),
     63     do: url(ts, add(result, text), needed, lnb)
     64   defp url([{:close_bracket, text} | ts], result, needed, lnb),
     65     do: url(ts, add(result, text), needed, lnb)
     66   defp url([{:any_quote, text} | ts], result, needed, lnb),
     67     do: url(ts, add(result, text), needed, lnb)
     68   defp url([{:verbatim, text} | ts], result, needed, lnb),
     69     do: url(ts, add(result, text), needed, lnb)
     70   defp url([{:ws, text} | ts], result, needed, lnb),
     71     do: url(ts, add(result, text), needed, lnb)
     72   defp url([{:escaped, text} | ts], result, needed, lnb),
     73     do: url(ts, add(result, text), needed, lnb)
     74 
     75   # That is not good, actually this is not a legal url part of a link
     76   defp url(_, _, _, _), do: nil
     77 
     78   defp bail_out_to_title(ts, result) do
     79     with remaining_text <- ts |> Enum.map(&text_of_token/1) |> Enum.join("") do
     80       case title(remaining_text) do
     81         nil                       -> nil
     82         {title_text, inner_title} ->
     83           add_title(result, {title_text, inner_title})
     84       end
     85     end
     86   end
     87 
     88   defp text_of_token(token)
     89   defp text_of_token({:escaped, text}), do: "\\#{text}"
     90   defp text_of_token({_, text}) do
     91     text
     92   end
     93 
     94   # sic!!! Greedy and not context aware, matching '..." and "...' for backward comp
     95   @title_rgx ~r{\A\s+(['"])(.*?)\1(?=\))}
     96   defp title(remaining_text) do
     97     case Regex.run(@title_rgx, remaining_text) do
     98       nil -> nil
     99       [parsed, _, inner] -> {parsed, inner}
    100     end
    101   end
    102 
    103   @wikilink_rgx ~r{\A\[\[([^\]\|]+)(?:\|([^\]]+))?\]\]\Z}
    104   defp make_result(nil, _, parsed_text, :link) do
    105     case Regex.run(@wikilink_rgx, parsed_text) do
    106       nil -> nil
    107       [_, wikilink] -> make_wikilink(parsed_text, wikilink, wikilink)
    108       [_, wikilink, link_text] -> make_wikilink(parsed_text, wikilink, link_text)
    109     end
    110   end
    111 
    112   defp make_result(nil, _, _, _), do: nil
    113 
    114   defp make_result({parsed, url, title}, link_text, parsed_text, link_or_img) do
    115     {"#{parsed_text}(#{list_to_text(parsed)})", link_text, list_to_text(url), title, link_or_img}
    116   end
    117 
    118   defp add({parsed_text, url_text, nil}, text), do: {[text | parsed_text], [text | url_text], nil}
    119 
    120   defp add_title({parsed_text, url_text, _}, {parsed, inner}),
    121     do: {[parsed | parsed_text], url_text, inner}
    122 
    123   defp make_wikilink(parsed_text, target, link_text) do
    124     {parsed_text, String.trim(link_text), String.trim(target), nil, :wikilink}
    125   end
    126 
    127   defp list_to_text(lst), do: lst |> Enum.reverse() |> Enum.join("")
    128 end
    129 
    130 # SPDX-License-Identifier: Apache-2.0