zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

line_scanner.ex (9567B)


      1 defmodule EarmarkParser.LineScanner do
      2   @moduledoc false
      3 
      4   alias EarmarkParser.{Helpers, Line, Options}
      5 
      6   # This is the re that matches the ridiculous "[id]: url title" syntax
      7 
      8   @id_title_part ~S"""
      9         (?|
     10              " (.*)  "         # in quotes
     11           |  ' (.*)  '         #
     12           | \( (.*) \)         # in parens
     13         )
     14   """
     15 
     16   @id_re ~r'''
     17      ^\[([^^].*?)\]:            # [someid]:
     18      \s+
     19      (?|
     20          < (\S+) >          # url in <>s
     21        |   (\S+)            # or without
     22      )
     23      (?:
     24         \s+                   # optional title
     25         #{@id_title_part}
     26      )?
     27      \s*
     28   $
     29   '''x
     30 
     31   @indent_re ~r'''
     32     \A ( (?: \s{4})+ ) (\s*)                       # 4 or more leading spaces
     33     (.*)                                  # the rest
     34   '''x
     35 
     36   @void_tags ~w{area br hr img wbr}
     37   @void_tag_rgx ~r'''
     38       ^<( #{Enum.join(@void_tags, "|")} )
     39         .*?
     40         >
     41   '''x
     42   @doc false
     43   def void_tag?(tag), do: Regex.match?(@void_tag_rgx, "<#{tag}>")
     44 
     45   def scan_lines(lines, options, recursive) do
     46     _lines_with_count(lines, options.line - 1)
     47     |> _with_lookahead(options, recursive)
     48   end
     49 
     50   def type_of(line, recursive)
     51       when is_boolean(recursive),
     52       do: type_of(line, %Options{}, recursive)
     53 
     54   def type_of({line, lnb}, options = %Options{annotations: annotations}, recursive) do
     55     {line1, annotation} = line |> Helpers.expand_tabs() |> Helpers.remove_line_ending(annotations)
     56     %{_type_of(line1, options, recursive) | annotation: annotation, lnb: lnb}
     57   end
     58 
     59   defp _type_of(line, options = %Options{}, recursive) do
     60     {ial, stripped_line} = Helpers.extract_ial(line)
     61     {content, indent} = _count_indent(line, 0)
     62     lt_four? = indent < 4
     63 
     64     cond do
     65       content == "" ->
     66         _create_text(line, content, indent)
     67 
     68       lt_four? && !recursive && Regex.run(~r/\A <! (?: -- .*? -- \s* )+ > \z/x, content) ->
     69         %Line.HtmlComment{complete: true, indent: indent, line: line}
     70 
     71       lt_four? && !recursive && Regex.run(~r/\A <!-- .*? \z/x, content) ->
     72         %Line.HtmlComment{complete: false, indent: indent, line: line}
     73 
     74       lt_four? && Regex.run(~r/^ (?:-\s?){3,} $/x, content) ->
     75         %Line.Ruler{type: "-", indent: indent, line: line}
     76 
     77       lt_four? && Regex.run(~r/^ (?:\*\s?){3,} $/x, content) ->
     78         %Line.Ruler{type: "*", indent: indent, line: line}
     79 
     80       lt_four? && Regex.run(~r/\A (?:_\s?){3,} \z/x, content) ->
     81         %Line.Ruler{type: "_", indent: indent, line: line}
     82 
     83       match = Regex.run(~R/^(#{1,6})\s+(?|([^#]+)#*\s*$|(.*))/u, stripped_line) ->
     84         [_, level, heading] = match
     85 
     86         %Line.Heading{
     87           level: String.length(level),
     88           content: String.trim(heading),
     89           indent: 0,
     90           ial: ial,
     91           line: line
     92         }
     93 
     94       match = lt_four? && Regex.run(~r/\A>\s?(.*)/, content) ->
     95         [_, quote] = match
     96         %Line.BlockQuote{content: quote, indent: indent, ial: ial, line: line}
     97 
     98       match = Regex.run(@indent_re, line) ->
     99         [_, spaces, more_spaces, rest] = match
    100         sl = byte_size(spaces)
    101 
    102         %Line.Indent{
    103           level: div(sl, 4),
    104           content: more_spaces <> rest,
    105           indent: byte_size(more_spaces) + sl,
    106           line: line
    107         }
    108 
    109       match = Regex.run(~r/\A(\s*)(`{3,}|~{3,})\s*([^`\s]*)\s*\z/u, line) ->
    110         [_, leading, fence, language] = match
    111 
    112         %Line.Fence{
    113           delimiter: fence,
    114           language: _attribute_escape(language),
    115           indent: byte_size(leading),
    116           line: line
    117         }
    118 
    119       # Although no block tags I still think they should close a preceding para as do many other
    120       # implementations.
    121       match = !recursive && Regex.run(@void_tag_rgx, line) ->
    122         [_, tag] = match
    123         %Line.HtmlOneLine{tag: tag, content: line, indent: 0, line: line}
    124 
    125       match = !recursive && Regex.run(~r{\A<([-\w]+?)(?:\s.*)?>.*</\1>}, line) ->
    126         [_, tag] = match
    127         %Line.HtmlOneLine{tag: tag, content: line, indent: 0, line: line}
    128 
    129       match = !recursive && Regex.run(~r{\A<([-\w]+?)(?:\s.*)?/>.*}, line) ->
    130         [_, tag] = match
    131         %Line.HtmlOneLine{tag: tag, content: line, indent: 0, line: line}
    132 
    133       match = !recursive && Regex.run(~r/\A < ([-\w]+?) (?:\s.*)? >/x, line) ->
    134         [_, tag] = match
    135         %Line.HtmlOpenTag{tag: tag, content: line, indent: 0, line: line}
    136 
    137       match = lt_four? && !recursive && Regex.run(~r/\A<\/([-\w]+?)>/, content) ->
    138         [_, tag] = match
    139         %Line.HtmlCloseTag{tag: tag, indent: indent, line: line}
    140 
    141       match = lt_four? && Regex.run(@id_re, content) ->
    142         [_, id, url | title] = match
    143         title = if(length(title) == 0, do: "", else: hd(title))
    144         %Line.IdDef{id: id, url: url, title: title, indent: indent, line: line}
    145 
    146       match = options.footnotes && Regex.run(~r/\A\[\^([^\s\]]+)\]:\s+(.*)/, line) ->
    147         [_, id, first_line] = match
    148         %Line.FnDef{id: id, content: first_line, indent: 0, line: line}
    149 
    150       match = lt_four? && Regex.run(~r/^([-*+])\s(\s*)(.*)/, content) ->
    151         [_, bullet, spaces, text] = match
    152 
    153         %Line.ListItem{
    154           type: :ul,
    155           bullet: bullet,
    156           content: spaces <> text,
    157           indent: indent,
    158           list_indent: String.length(bullet <> spaces) + indent + 1,
    159           line: line
    160         }
    161 
    162       match = lt_four? && Regex.run(~r/^(\d{1,9}[.)])\s(\s*)(.*)/, content) ->
    163         _create_list_item(match, indent, line)
    164 
    165       match = Regex.run(~r/^ \| (?: [^|]+ \|)+ \s* $ /x, content) ->
    166         [body] = match
    167 
    168         body =
    169           body
    170           |> String.trim()
    171           |> String.trim("|")
    172 
    173         columns = _split_table_columns(body)
    174 
    175         %Line.TableLine{
    176           content: line,
    177           columns: columns,
    178           is_header: _determine_if_header(columns),
    179           indent: indent,
    180           line: line
    181         }
    182 
    183       line |> String.replace(~r/\[\[ .*? \]\]/x, "") |> String.match?(~r/\A (\s*) .* \s \| \s /x) ->
    184         columns = _split_table_columns(line)
    185 
    186         %Line.TableLine{
    187           content: line,
    188           columns: columns,
    189           is_header: _determine_if_header(columns),
    190           indent: indent,
    191           line: line
    192         }
    193 
    194       options.gfm_tables && line |> String.replace(~r/\[\[ .*? \]\]/x, "") |> String.match?(~r/\A (\s*) .* \| /x) ->
    195         columns = _split_table_columns(line)
    196 
    197         %Line.TableLine{
    198           content: line,
    199           columns: columns,
    200           is_header: _determine_if_header(columns),
    201           needs_header: true,
    202           indent: indent,
    203           line: line
    204         }
    205 
    206       match = Regex.run(~r/^(=|-)+\s*$/, line) ->
    207         [_, type] = match
    208         level = if(String.starts_with?(type, "="), do: 1, else: 2)
    209         %Line.SetextUnderlineHeading{level: level, indent: 0, line: line}
    210 
    211       match = lt_four? && Regex.run(~r<^{:(\s*[^}]+)}\s*$>, content) ->
    212         [_, ial] = match
    213         %Line.Ial{attrs: String.trim(ial), verbatim: ial, indent: indent, line: line}
    214 
    215       true ->
    216         _create_text(line, content, indent)
    217     end
    218   end
    219 
    220   defp _attribute_escape(string),
    221     do:
    222       string
    223       |> String.replace("&", "&amp;")
    224       |> String.replace("<", "&lt;")
    225 
    226   defp _create_list_item(match, indent, line)
    227 
    228   defp _create_list_item([_, bullet, spaces, text], indent, line) do
    229     sl = byte_size(spaces)
    230     sl1 = if sl > 3, do: 1, else: sl + 1
    231     sl2 = sl1 + byte_size(bullet)
    232 
    233     %Line.ListItem{
    234       type: :ol,
    235       bullet: bullet,
    236       content: spaces <> text,
    237       indent: indent,
    238       list_indent: indent + sl2,
    239       line: line
    240     }
    241   end
    242 
    243   defp _create_text(line) do
    244     {content, indent} = _count_indent(line, 0)
    245     _create_text(line, content, indent)
    246   end
    247 
    248   defp _create_text(line, "", indent),
    249     do: %Line.Blank{indent: indent, line: line}
    250 
    251   defp _create_text(line, content, indent),
    252     do: %Line.Text{content: content, indent: indent, line: line}
    253 
    254   defp _count_indent(<<space, rest::binary>>, indent) when space in [?\s, ?\t],
    255     do: _count_indent(rest, indent + 1)
    256 
    257   defp _count_indent(rest, indent),
    258     do: {rest, indent}
    259 
    260   defp _lines_with_count(lines, offset) do
    261     Enum.zip(lines, offset..(offset + Enum.count(lines)))
    262   end
    263 
    264   defp _with_lookahead([line_lnb | lines], options, recursive) do
    265     case type_of(line_lnb, options, recursive) do
    266       %Line.Fence{delimiter: delimiter, indent: 0} = fence ->
    267         stop = ~r/\A (\s*) (?: #{delimiter} ) \s* ([^`\s]*) \s* \z/xu
    268         [fence | _lookahead_until_match(lines, stop, options, recursive)]
    269 
    270       %Line.HtmlComment{complete: false} = html_comment ->
    271         [html_comment | _lookahead_until_match(lines, ~r/-->/u, options, recursive)]
    272 
    273       other ->
    274         [other | _with_lookahead(lines, options, recursive)]
    275     end
    276   end
    277 
    278   defp _with_lookahead([], _options, _recursive), do: []
    279 
    280   defp _lookahead_until_match([], _, _, _), do: []
    281 
    282   defp _lookahead_until_match([{line, lnb} | lines], regex, options, recursive) do
    283     if line =~ regex do
    284       [type_of({line, lnb}, options, recursive) | _with_lookahead(lines, options, recursive)]
    285     else
    286       [
    287         %{_create_text(line) | lnb: lnb}
    288         | _lookahead_until_match(lines, regex, options, recursive)
    289       ]
    290     end
    291   end
    292 
    293   @column_rgx ~r{\A[\s|:-]+\z}
    294   defp _determine_if_header(columns) do
    295     columns
    296     |> Enum.all?(fn col -> Regex.run(@column_rgx, col) end)
    297   end
    298 
    299   defp _split_table_columns(line) do
    300     line
    301     |> String.split(~r{(?<!\\)\|})
    302     |> Enum.map(&String.trim/1)
    303     |> Enum.map(fn col -> Regex.replace(~r{\\\|}, col, "|") end)
    304   end
    305 end
    306 
    307 #  SPDX-License-Identifier: Apache-2.0