zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

parser.ex (19812B)


      1 defmodule EarmarkParser.Parser do
      2 
      3   @moduledoc false
      4   alias EarmarkParser.{Block, Line, LineScanner, Options}
      5 
      6   import EarmarkParser.Helpers.{AttrParser, LineHelpers, ReparseHelpers}
      7 
      8   import EarmarkParser.Helpers.LookaheadHelpers,
      9     only: [opens_inline_code: 1, still_inline_code: 2]
     10 
     11   import EarmarkParser.Message, only: [add_message: 2, add_messages: 2]
     12   import EarmarkParser.Parser.FootnoteParser, only: [parse_fn_defs: 3]
     13   import EarmarkParser.Parser.ListParser, only: [parse_list: 3]
     14 
     15   @doc """
     16   Given a markdown document (as either a list of lines or
     17   a string containing newlines), return a parse tree and
     18   the context necessary to render the tree.
     19 
     20   The options are a `%EarmarkParser.Options{}` structure. See `as_html!`
     21   for more details.
     22   """
     23   def parse_markdown(lines, options)
     24 
     25   def parse_markdown(lines, options = %Options{}) when is_list(lines) do
     26     {blocks, links, footnotes, options1} = parse(lines, options, false)
     27 
     28     context =
     29       %EarmarkParser.Context{options: options1, links: links}
     30       |> EarmarkParser.Context.update_context()
     31 
     32     context = put_in(context.footnotes, footnotes)
     33     context = put_in(context.options, options1)
     34     {blocks, context}
     35   end
     36 
     37   def parse_markdown(lines, options) when is_binary(lines) do
     38     lines
     39     |> String.split(~r{\r\n?|\n})
     40     |> parse_markdown(options)
     41   end
     42 
     43   def parse(text_lines, options = %Options{}, recursive) do
     44     ["" | text_lines ++ [""]]
     45     |> LineScanner.scan_lines(options, recursive)
     46     |> parse_lines(options, recursive)
     47   end
     48 
     49   @doc false
     50   # Given a list of `Line.xxx` structs, group them into related blocks.
     51   # Then extract any id definitions, and build a map from them. Not
     52   # for external consumption.
     53 
     54   def parse_lines(lines, options, recursive) do
     55     {blocks, footnotes, options} =
     56       lines |> remove_trailing_blank_lines() |> lines_to_blocks(options, recursive)
     57 
     58 
     59     links = links_from_blocks(blocks)
     60     {blocks, links, footnotes, options}
     61   end
     62 
     63   defp lines_to_blocks(lines, options, recursive) do
     64     {blocks, footnotes, options1} = _parse(lines, [], options, recursive)
     65 
     66     {blocks |> assign_attributes_to_blocks([]), footnotes, options1}
     67   end
     68 
     69   defp _parse(input, result, options, recursive)
     70   defp _parse([], result, options, _recursive), do: {result, %{}, options}
     71 
     72   ###################
     73   # setext headings #
     74   ###################
     75 
     76   # 1 step
     77   defp _parse(
     78          [
     79            %Line.Blank{},
     80            %Line.Text{content: heading, lnb: lnb},
     81            %Line.SetextUnderlineHeading{annotation: annotation, level: level}
     82            | rest
     83          ],
     84          result,
     85          options,
     86          recursive
     87        ) do
     88     _parse(
     89       rest,
     90       [%Block.Heading{annotation: annotation, content: heading, level: level, lnb: lnb} | result],
     91       options,
     92       recursive
     93     )
     94   end
     95 
     96   # 1 step
     97   defp _parse(
     98          [
     99            %Line.Blank{},
    100            %Line.Text{content: heading, lnb: lnb},
    101            %Line.Ruler{type: "-"}
    102            | rest
    103          ],
    104          result,
    105          options,
    106          recursive
    107        ) do
    108     _parse(
    109       rest,
    110       [%Block.Heading{content: heading, level: 2, lnb: lnb} | result],
    111       options,
    112       recursive
    113     )
    114   end
    115 
    116   #################
    117   # Other heading #
    118   #################
    119 
    120   # 1 step
    121   defp _parse(
    122          [%Line.Heading{content: content, ial: ial, level: level, lnb: lnb} | rest],
    123          result,
    124          options,
    125          recursive
    126        ) do
    127     {options1, result1} =
    128       prepend_ial(
    129         options,
    130         ial,
    131         lnb,
    132         [%Block.Heading{content: content, level: level, lnb: lnb} | result]
    133       )
    134 
    135     _parse(rest, result1, options1, recursive)
    136   end
    137 
    138   #########
    139   # Ruler #
    140   #########
    141 
    142   # 1 step
    143   defp _parse([%Line.Ruler{type: type, lnb: lnb} | rest], result, options, recursive) do
    144     _parse(rest, [%Block.Ruler{type: type, lnb: lnb} | result], options, recursive)
    145   end
    146 
    147   ###############
    148   # Block Quote #
    149   ###############
    150 
    151   # split and parse
    152   defp _parse(lines = [%Line.BlockQuote{lnb: lnb} | _], result, options, recursive) do
    153     {quote_lines, rest} = Enum.split_while(lines, &blockquote_or_text?/1)
    154     lines = for line <- quote_lines, do: line.content
    155     {blocks, _, _, options1} = parse(lines, %{options | line: lnb}, true)
    156     _parse(rest, [%Block.BlockQuote{blocks: blocks, lnb: lnb} | result], options1, recursive)
    157   end
    158 
    159   #########
    160   # Table #
    161   #########
    162 
    163   # read and add verbatim
    164   defp _parse(
    165          lines = [
    166            %Line.TableLine{columns: cols1, lnb: lnb1, needs_header: false},
    167            %Line.TableLine{columns: cols2}
    168            | _rest
    169          ],
    170          result,
    171          options,
    172          recursive
    173        )
    174        when length(cols1) == length(cols2) do
    175     columns = length(cols1)
    176     {table, rest} = read_table(lines, columns, [])
    177     table1 = %{table | lnb: lnb1}
    178     _parse(rest, [table1 | result], options, recursive)
    179   end
    180 
    181   defp _parse(
    182          lines = [
    183            %Line.TableLine{columns: cols1, lnb: lnb1, needs_header: true},
    184            %Line.TableLine{columns: cols2, is_header: true}
    185            | _rest
    186          ],
    187          result,
    188          options,
    189          recursive
    190        )
    191        when length(cols1) == length(cols2) do
    192     columns = length(cols1)
    193     {table, rest} = read_table(lines, columns, [])
    194     table1 = %{table | lnb: lnb1}
    195     _parse(rest, [table1 | result], options, recursive)
    196   end
    197 
    198   #############
    199   # Paragraph #
    200   #############
    201 
    202   # split and add verbatim
    203   defp _parse(lines = [%Line.TableLine{lnb: lnb} | _], result, options, recursive) do
    204     {para_lines, rest} = Enum.split_while(lines, &text?/1)
    205     line_text = for line <- para_lines, do: line.line
    206     _parse(rest, [%Block.Para{lines: line_text, lnb: lnb + 1} | result], options, recursive)
    207   end
    208 
    209   # read and parse
    210   defp _parse(lines = [%Line.Text{lnb: lnb} | _], result, options, recursive) do
    211     {reversed_para_lines, rest, pending, annotation} = consolidate_para(lines)
    212 
    213     options1 =
    214       case pending do
    215         {nil, _} ->
    216           options
    217 
    218         {pending, lnb1} ->
    219           add_message(
    220             options,
    221             {:warning, lnb1, "Closing unclosed backquotes #{pending} at end of input"}
    222           )
    223       end
    224 
    225     line_text = for line <- reversed_para_lines |> Enum.reverse(), do: line.line
    226 
    227     if recursive == :list do
    228       _parse(rest, [%Block.Text{line: line_text, lnb: lnb} | result], options1, recursive)
    229     else
    230       _parse(
    231         rest,
    232         [%Block.Para{annotation: annotation, lines: line_text, lnb: lnb} | result],
    233         options1,
    234         recursive
    235       )
    236     end
    237   end
    238 
    239   defp _parse(
    240          [%Line.SetextUnderlineHeading{line: line, lnb: lnb, level: 2} | rest],
    241          result,
    242          options,
    243          recursive
    244        ) do
    245     _parse([%Line.Text{line: line, lnb: lnb} | rest], result, options, recursive)
    246   end
    247 
    248   #########
    249   # Lists #
    250   #########
    251   # We handle lists in two passes. In the first, we build list items,
    252   # in the second we combine adjacent items into lists. This is pass one
    253 
    254   defp _parse([%Line.ListItem{} | _] = input, result, options, recursive) do
    255     {with_prepended_lists, rest, options1} = parse_list(input, result, options)
    256     _parse([%Line.Blank{lnb: 0} | rest], with_prepended_lists, options1, recursive)
    257   end
    258 
    259   #################
    260   # Indented code #
    261   #################
    262 
    263   defp _parse(list = [%Line.Indent{lnb: lnb} | _], result, options, recursive) do
    264     {code_lines, rest} = Enum.split_while(list, &indent_or_blank?/1)
    265     code_lines = remove_trailing_blank_lines(code_lines)
    266     code = for line <- code_lines, do: properly_indent(line, 1)
    267     _parse([%Line.Blank{}|rest], [%Block.Code{lines: code, lnb: lnb} | result], options, recursive)
    268   end
    269 
    270   ###############
    271   # Fenced code #
    272   ###############
    273 
    274   defp _parse(
    275          [%Line.Fence{delimiter: delimiter, language: language, lnb: lnb} | rest],
    276          result,
    277          options,
    278          recursive
    279        ) do
    280     {code_lines, rest} =
    281       Enum.split_while(rest, fn line ->
    282         !match?(%Line.Fence{delimiter: ^delimiter, language: _}, line)
    283       end)
    284 
    285     {rest1, options1} = _check_closing_fence(rest, lnb, delimiter, options)
    286     code = for line <- code_lines, do: line.line
    287 
    288     _parse(
    289       rest1,
    290       [%Block.Code{lines: code, language: language, lnb: lnb} | result],
    291       options1,
    292       recursive
    293     )
    294   end
    295 
    296   ##############
    297   # HTML block #
    298   ##############
    299   defp _parse(
    300          [opener = %Line.HtmlOpenTag{annotation: annotation, tag: tag, lnb: lnb} | rest],
    301          result,
    302          options,
    303          recursive
    304        ) do
    305     {html_lines, rest1, unclosed, annotation} = _html_match_to_closing(opener, rest, annotation)
    306 
    307     options1 =
    308       add_messages(
    309         options,
    310         unclosed
    311         |> Enum.map(fn %{lnb: lnb1, tag: tag} ->
    312           {:warning, lnb1, "Failed to find closing <#{tag}>"}
    313         end)
    314       )
    315 
    316     html = Enum.reverse(html_lines)
    317 
    318     _parse(
    319       rest1,
    320       [%Block.Html{tag: tag, html: html, lnb: lnb, annotation: annotation} | result],
    321       options1,
    322       recursive
    323     )
    324   end
    325 
    326   ####################
    327   # HTML on one line #
    328   ####################
    329 
    330   defp _parse(
    331          [%Line.HtmlOneLine{annotation: annotation, line: line, lnb: lnb} | rest],
    332          result,
    333          options,
    334          recursive
    335        ) do
    336     _parse(
    337       rest,
    338       [%Block.HtmlOneline{annotation: annotation, html: [line], lnb: lnb} | result],
    339       options,
    340       recursive
    341     )
    342   end
    343 
    344   ################
    345   # HTML Comment #
    346   ################
    347 
    348   defp _parse(
    349          [line = %Line.HtmlComment{complete: true, lnb: lnb} | rest],
    350          result,
    351          options,
    352          recursive
    353        ) do
    354     _parse(rest, [%Block.HtmlComment{lines: [line.line], lnb: lnb} | result], options, recursive)
    355   end
    356 
    357   defp _parse(
    358          lines = [%Line.HtmlComment{complete: false, lnb: lnb} | _],
    359          result,
    360          options,
    361          recursive
    362        ) do
    363     {html_lines, rest} =
    364       Enum.split_while(lines, fn line ->
    365         !(line.line =~ ~r/-->/)
    366       end)
    367 
    368     {html_lines, rest} =
    369       if rest == [] do
    370         {html_lines, rest}
    371       else
    372         {html_lines ++ [hd(rest)], tl(rest)}
    373       end
    374 
    375     html = for line <- html_lines, do: line.line
    376     _parse(rest, [%Block.HtmlComment{lines: html, lnb: lnb} | result], options, recursive)
    377   end
    378 
    379   #################
    380   # ID definition #
    381   #################
    382 
    383   defp _parse([defn = %Line.IdDef{lnb: lnb} | rest], result, options, recursive) do
    384     _parse(
    385       rest,
    386       [%Block.IdDef{id: defn.id, url: defn.url, title: defn.title, lnb: lnb} | result],
    387       options,
    388       recursive
    389     )
    390   end
    391 
    392   #######################
    393   # Footnote Definition #
    394   #######################
    395 
    396   # Starting from 1.5.0 Footnote Definitions are always at the end of the document (GFM) meaning that the
    397   # `_parse` iteration can now end and we will trigger `_parse_fn_defs`
    398   # this has the advantage that we can make the assumption that the top of the `result`
    399   # list contains a `Block.FnList` element
    400   defp _parse([%Line.FnDef{} | _] = input, result, options, _recursive) do
    401     parse_fn_defs(input, result, options)
    402   end
    403 
    404   ####################
    405   # IAL (attributes) #
    406   ####################
    407 
    408   defp _parse(
    409          [%Line.Ial{attrs: attrs, lnb: lnb, verbatim: verbatim} | rest],
    410          result,
    411          options,
    412          recursive
    413        ) do
    414     {options1, attributes} = parse_attrs(options, attrs, lnb)
    415 
    416     _parse(
    417       rest,
    418       [%Block.Ial{attrs: attributes, content: attrs, lnb: lnb, verbatim: verbatim} | result],
    419       options1,
    420       recursive
    421     )
    422   end
    423 
    424   ###############
    425   # Blank Lines #
    426   ###############
    427   # We've reached the point where empty lines are no longer significant
    428 
    429   defp _parse([%Line.Blank{} | rest], result, options, recursive) do
    430     _parse(rest, result, options, recursive)
    431   end
    432 
    433   ##############################################################
    434   # Anything else... we warn, then treat it as if it were text #
    435   ##############################################################
    436 
    437   defp _parse([anything = %{lnb: lnb} | rest], result, options, recursive) do
    438     _parse(
    439       [%Line.Text{content: anything.line, lnb: lnb} | rest],
    440       result,
    441       add_message(options, {:warning, anything.lnb, "Unexpected line #{anything.line}"}),
    442       recursive
    443     )
    444   end
    445 
    446 
    447   #######################################################
    448   # Assign attributes that follow a block to that block #
    449   #######################################################
    450 
    451   defp assign_attributes_to_blocks([], result), do: result
    452 
    453   defp assign_attributes_to_blocks([%Block.Ial{attrs: attrs}, block | rest], result) do
    454     assign_attributes_to_blocks(rest, [%{block | attrs: attrs} | result])
    455   end
    456 
    457   defp assign_attributes_to_blocks([block | rest], result) do
    458     assign_attributes_to_blocks(rest, [block | result])
    459   end
    460 
    461   defp _check_closing_fence(rest, lnb, delimiter, options)
    462   defp _check_closing_fence([], lnb, delimiter, options) do
    463     {[], add_message(options, {:error, lnb, "Fenced Code Block opened with #{delimiter} not closed at end of input"})}
    464   end
    465   defp _check_closing_fence([_|rest], _lnb, _delimiter, options) do
    466     {rest, options}
    467   end
    468 
    469   ############################################################
    470   # Consolidate multiline inline code blocks into an element #
    471   ############################################################
    472   @not_pending {nil, 0}
    473   # ([#{},...]) -> {[#{}],[#{}],{'nil' | binary(),number()}}
    474   # @spec consolidate_para( ts ) :: { ts, ts, {nil | String.t, number} }
    475   defp consolidate_para(lines), do: _consolidate_para(lines, [], @not_pending, nil)
    476 
    477   defp _consolidate_para([], result, pending, annotation) do
    478     {result, [], pending, annotation}
    479   end
    480 
    481   defp _consolidate_para([line | rest] = lines, result, pending, annotation) do
    482     case _inline_or_text?(line, pending) do
    483       %{pending: still_pending, continue: true} ->
    484         _consolidate_para(rest, [line | result], still_pending, annotation || line.annotation)
    485 
    486       _ ->
    487         {result, lines, @not_pending, annotation}
    488     end
    489   end
    490 
    491   ##################################################
    492   # Read in a table (consecutive TableLines with
    493   # the same number of columns)
    494 
    495   defp read_table(lines, col_count, rows)
    496 
    497   defp read_table(
    498          [%Line.TableLine{columns: cols} | rest],
    499          col_count,
    500          rows
    501        )
    502        when length(cols) == col_count do
    503     read_table(rest, col_count, [cols | rows])
    504   end
    505 
    506   defp read_table(rest, col_count, rows) do
    507     rows = Enum.reverse(rows)
    508     table = Block.Table.new_for_columns(col_count)
    509 
    510     table =
    511       case look_for_alignments(rows) do
    512         nil -> %Block.Table{table | rows: rows}
    513         aligns -> %Block.Table{table | alignments: aligns, header: hd(rows), rows: tl(tl(rows))}
    514       end
    515 
    516     {table, [%Line.Blank{lnb: 0} | rest]}
    517   end
    518 
    519   defp look_for_alignments([_first, second | _rest]) do
    520     if Enum.all?(second, fn row -> row =~ ~r{^:?-+:?$} end) do
    521       second
    522       |> Enum.map(fn row -> Regex.replace(~r/-+/, row, "-") end)
    523       |> Enum.map(fn row ->
    524         case row do
    525           ":-:" -> :center
    526           ":-" -> :left
    527           "-" -> :left
    528           "-:" -> :right
    529         end
    530       end)
    531     else
    532       nil
    533     end
    534   end
    535 
    536   #####################################################
    537   # Traverse the block list and build a list of links #
    538   #####################################################
    539 
    540   defp links_from_blocks(blocks) do
    541     visit(blocks, Map.new(), &link_extractor/2)
    542   end
    543 
    544   defp link_extractor(item = %Block.IdDef{id: id}, result) do
    545     Map.put(result, String.downcase(id), item)
    546   end
    547 
    548   defp link_extractor(_, result), do: result
    549 
    550   ##################################
    551   # Visitor pattern for each block #
    552   ##################################
    553 
    554   defp visit([], result, _func), do: result
    555 
    556   # Structural node BlockQuote -> descend
    557   defp visit([item = %Block.BlockQuote{blocks: blocks} | rest], result, func) do
    558     result = func.(item, result)
    559     result = visit(blocks, result, func)
    560     visit(rest, result, func)
    561   end
    562 
    563   # Structural node List -> descend
    564   defp visit([item = %Block.List{blocks: blocks} | rest], result, func) do
    565     result = func.(item, result)
    566     result = visit(blocks, result, func)
    567     visit(rest, result, func)
    568   end
    569 
    570   # Structural node ListItem -> descend
    571   defp visit([item = %Block.ListItem{blocks: blocks} | rest], result, func) do
    572     result = func.(item, result)
    573     result = visit(blocks, result, func)
    574     visit(rest, result, func)
    575   end
    576 
    577   # Leaf, leaf it alone
    578   defp visit([item | rest], result, func) do
    579     result = func.(item, result)
    580     visit(rest, result, func)
    581   end
    582 
    583   ###################################################################
    584   # Consume HTML, taking care of nesting. Assumes one tag per line. #
    585   ###################################################################
    586 
    587   defp _html_match_to_closing(opener, rest, annotation),
    588     do: _find_closing_tags([opener], rest, [opener.line], [], annotation)
    589 
    590   defp _find_closing_tags(needed, input, html_lines, text_lines, annotation)
    591 
    592   # No more open tags, happy case
    593   defp _find_closing_tags([], rest, html_lines, [], annotation),
    594     do: {html_lines, rest, [], annotation}
    595 
    596   # run out of input, unhappy case
    597   defp _find_closing_tags(needed, [], html_lines, text_lines, annotation),
    598     do: {_add_text_lines(html_lines, text_lines), [], needed, annotation}
    599 
    600   # still more lines, still needed closing
    601   defp _find_closing_tags(
    602          needed = [needed_hd | needed_tl],
    603          [rest_hd | rest_tl],
    604          html_lines,
    605          text_lines,
    606          annotation
    607        ) do
    608     cond do
    609       _closes_tag?(rest_hd, needed_hd) ->
    610         _find_closing_tags(
    611           needed_tl,
    612           rest_tl,
    613           [rest_hd.line | _add_text_lines(html_lines, text_lines)],
    614           [],
    615           _override_annotation(annotation, rest_hd)
    616         )
    617 
    618       _opens_tag?(rest_hd) ->
    619         _find_closing_tags(
    620           [rest_hd | needed],
    621           rest_tl,
    622           [rest_hd.line | _add_text_lines(html_lines, text_lines)],
    623           [],
    624           annotation
    625         )
    626 
    627       true ->
    628         _find_closing_tags(needed, rest_tl, html_lines, [rest_hd.line | text_lines], annotation)
    629     end
    630   end
    631 
    632   defp _add_text_lines(html_lines, []),
    633     do: html_lines
    634 
    635   defp _add_text_lines(html_lines, text_lines),
    636     do: [text_lines |> Enum.reverse() |> Enum.join("\n") | html_lines]
    637 
    638   ###########
    639   # Helpers #
    640   ###########
    641 
    642   defp _closes_tag?(%Line.HtmlCloseTag{tag: ctag}, %Line.HtmlOpenTag{tag: otag}) do
    643     ctag == otag
    644   end
    645 
    646   defp _closes_tag?(_, _), do: false
    647 
    648   defp _opens_tag?(%Line.HtmlOpenTag{}), do: true
    649   defp _opens_tag?(_), do: false
    650 
    651   defp _inline_or_text?(line, pending)
    652 
    653   defp _inline_or_text?(line = %Line.Text{}, @not_pending) do
    654     pending = opens_inline_code(line)
    655     %{pending: pending, continue: true}
    656   end
    657 
    658   defp _inline_or_text?(line = %Line.TableLine{}, @not_pending) do
    659     pending = opens_inline_code(line)
    660     %{pending: pending, continue: true}
    661   end
    662 
    663   defp _inline_or_text?(_line, @not_pending), do: %{pending: @not_pending, continue: false}
    664 
    665   defp _inline_or_text?(line, pending) do
    666     pending = still_inline_code(line, pending)
    667     %{pending: pending, continue: true}
    668   end
    669 
    670   defp _override_annotation(annotation, line), do: annotation || line.annotation
    671 
    672   defp remove_trailing_blank_lines(lines) do
    673     lines
    674     |> Enum.reverse()
    675     |> Enum.drop_while(&blank?/1)
    676     |> Enum.reverse()
    677   end
    678 
    679   def prepend_ial(context, maybeatts, lnb, result)
    680   def prepend_ial(context, nil, _lnb, result), do: {context, result}
    681 
    682   def prepend_ial(context, ial, lnb, result) do
    683     {context1, attributes} = parse_attrs(context, ial, lnb)
    684     {context1, [%Block.Ial{attrs: attributes, content: ial, lnb: lnb, verbatim: ial} | result]}
    685   end
    686 end
    687 
    688 # SPDX-License-Identifier: Apache-2.0