parser.ex (19812B)
1 defmodule EarmarkParser.Parser do 2 3 @moduledoc false 4 alias EarmarkParser.{Block, Line, LineScanner, Options} 5 6 import EarmarkParser.Helpers.{AttrParser, LineHelpers, ReparseHelpers} 7 8 import EarmarkParser.Helpers.LookaheadHelpers, 9 only: [opens_inline_code: 1, still_inline_code: 2] 10 11 import EarmarkParser.Message, only: [add_message: 2, add_messages: 2] 12 import EarmarkParser.Parser.FootnoteParser, only: [parse_fn_defs: 3] 13 import EarmarkParser.Parser.ListParser, only: [parse_list: 3] 14 15 @doc """ 16 Given a markdown document (as either a list of lines or 17 a string containing newlines), return a parse tree and 18 the context necessary to render the tree. 19 20 The options are a `%EarmarkParser.Options{}` structure. See `as_html!` 21 for more details. 22 """ 23 def parse_markdown(lines, options) 24 25 def parse_markdown(lines, options = %Options{}) when is_list(lines) do 26 {blocks, links, footnotes, options1} = parse(lines, options, false) 27 28 context = 29 %EarmarkParser.Context{options: options1, links: links} 30 |> EarmarkParser.Context.update_context() 31 32 context = put_in(context.footnotes, footnotes) 33 context = put_in(context.options, options1) 34 {blocks, context} 35 end 36 37 def parse_markdown(lines, options) when is_binary(lines) do 38 lines 39 |> String.split(~r{\r\n?|\n}) 40 |> parse_markdown(options) 41 end 42 43 def parse(text_lines, options = %Options{}, recursive) do 44 ["" | text_lines ++ [""]] 45 |> LineScanner.scan_lines(options, recursive) 46 |> parse_lines(options, recursive) 47 end 48 49 @doc false 50 # Given a list of `Line.xxx` structs, group them into related blocks. 51 # Then extract any id definitions, and build a map from them. Not 52 # for external consumption. 53 54 def parse_lines(lines, options, recursive) do 55 {blocks, footnotes, options} = 56 lines |> remove_trailing_blank_lines() |> lines_to_blocks(options, recursive) 57 58 59 links = links_from_blocks(blocks) 60 {blocks, links, footnotes, options} 61 end 62 63 defp lines_to_blocks(lines, options, recursive) do 64 {blocks, footnotes, options1} = _parse(lines, [], options, recursive) 65 66 {blocks |> assign_attributes_to_blocks([]), footnotes, options1} 67 end 68 69 defp _parse(input, result, options, recursive) 70 defp _parse([], result, options, _recursive), do: {result, %{}, options} 71 72 ################### 73 # setext headings # 74 ################### 75 76 # 1 step 77 defp _parse( 78 [ 79 %Line.Blank{}, 80 %Line.Text{content: heading, lnb: lnb}, 81 %Line.SetextUnderlineHeading{annotation: annotation, level: level} 82 | rest 83 ], 84 result, 85 options, 86 recursive 87 ) do 88 _parse( 89 rest, 90 [%Block.Heading{annotation: annotation, content: heading, level: level, lnb: lnb} | result], 91 options, 92 recursive 93 ) 94 end 95 96 # 1 step 97 defp _parse( 98 [ 99 %Line.Blank{}, 100 %Line.Text{content: heading, lnb: lnb}, 101 %Line.Ruler{type: "-"} 102 | rest 103 ], 104 result, 105 options, 106 recursive 107 ) do 108 _parse( 109 rest, 110 [%Block.Heading{content: heading, level: 2, lnb: lnb} | result], 111 options, 112 recursive 113 ) 114 end 115 116 ################# 117 # Other heading # 118 ################# 119 120 # 1 step 121 defp _parse( 122 [%Line.Heading{content: content, ial: ial, level: level, lnb: lnb} | rest], 123 result, 124 options, 125 recursive 126 ) do 127 {options1, result1} = 128 prepend_ial( 129 options, 130 ial, 131 lnb, 132 [%Block.Heading{content: content, level: level, lnb: lnb} | result] 133 ) 134 135 _parse(rest, result1, options1, recursive) 136 end 137 138 ######### 139 # Ruler # 140 ######### 141 142 # 1 step 143 defp _parse([%Line.Ruler{type: type, lnb: lnb} | rest], result, options, recursive) do 144 _parse(rest, [%Block.Ruler{type: type, lnb: lnb} | result], options, recursive) 145 end 146 147 ############### 148 # Block Quote # 149 ############### 150 151 # split and parse 152 defp _parse(lines = [%Line.BlockQuote{lnb: lnb} | _], result, options, recursive) do 153 {quote_lines, rest} = Enum.split_while(lines, &blockquote_or_text?/1) 154 lines = for line <- quote_lines, do: line.content 155 {blocks, _, _, options1} = parse(lines, %{options | line: lnb}, true) 156 _parse(rest, [%Block.BlockQuote{blocks: blocks, lnb: lnb} | result], options1, recursive) 157 end 158 159 ######### 160 # Table # 161 ######### 162 163 # read and add verbatim 164 defp _parse( 165 lines = [ 166 %Line.TableLine{columns: cols1, lnb: lnb1, needs_header: false}, 167 %Line.TableLine{columns: cols2} 168 | _rest 169 ], 170 result, 171 options, 172 recursive 173 ) 174 when length(cols1) == length(cols2) do 175 columns = length(cols1) 176 {table, rest} = read_table(lines, columns, []) 177 table1 = %{table | lnb: lnb1} 178 _parse(rest, [table1 | result], options, recursive) 179 end 180 181 defp _parse( 182 lines = [ 183 %Line.TableLine{columns: cols1, lnb: lnb1, needs_header: true}, 184 %Line.TableLine{columns: cols2, is_header: true} 185 | _rest 186 ], 187 result, 188 options, 189 recursive 190 ) 191 when length(cols1) == length(cols2) do 192 columns = length(cols1) 193 {table, rest} = read_table(lines, columns, []) 194 table1 = %{table | lnb: lnb1} 195 _parse(rest, [table1 | result], options, recursive) 196 end 197 198 ############# 199 # Paragraph # 200 ############# 201 202 # split and add verbatim 203 defp _parse(lines = [%Line.TableLine{lnb: lnb} | _], result, options, recursive) do 204 {para_lines, rest} = Enum.split_while(lines, &text?/1) 205 line_text = for line <- para_lines, do: line.line 206 _parse(rest, [%Block.Para{lines: line_text, lnb: lnb + 1} | result], options, recursive) 207 end 208 209 # read and parse 210 defp _parse(lines = [%Line.Text{lnb: lnb} | _], result, options, recursive) do 211 {reversed_para_lines, rest, pending, annotation} = consolidate_para(lines) 212 213 options1 = 214 case pending do 215 {nil, _} -> 216 options 217 218 {pending, lnb1} -> 219 add_message( 220 options, 221 {:warning, lnb1, "Closing unclosed backquotes #{pending} at end of input"} 222 ) 223 end 224 225 line_text = for line <- reversed_para_lines |> Enum.reverse(), do: line.line 226 227 if recursive == :list do 228 _parse(rest, [%Block.Text{line: line_text, lnb: lnb} | result], options1, recursive) 229 else 230 _parse( 231 rest, 232 [%Block.Para{annotation: annotation, lines: line_text, lnb: lnb} | result], 233 options1, 234 recursive 235 ) 236 end 237 end 238 239 defp _parse( 240 [%Line.SetextUnderlineHeading{line: line, lnb: lnb, level: 2} | rest], 241 result, 242 options, 243 recursive 244 ) do 245 _parse([%Line.Text{line: line, lnb: lnb} | rest], result, options, recursive) 246 end 247 248 ######### 249 # Lists # 250 ######### 251 # We handle lists in two passes. In the first, we build list items, 252 # in the second we combine adjacent items into lists. This is pass one 253 254 defp _parse([%Line.ListItem{} | _] = input, result, options, recursive) do 255 {with_prepended_lists, rest, options1} = parse_list(input, result, options) 256 _parse([%Line.Blank{lnb: 0} | rest], with_prepended_lists, options1, recursive) 257 end 258 259 ################# 260 # Indented code # 261 ################# 262 263 defp _parse(list = [%Line.Indent{lnb: lnb} | _], result, options, recursive) do 264 {code_lines, rest} = Enum.split_while(list, &indent_or_blank?/1) 265 code_lines = remove_trailing_blank_lines(code_lines) 266 code = for line <- code_lines, do: properly_indent(line, 1) 267 _parse([%Line.Blank{}|rest], [%Block.Code{lines: code, lnb: lnb} | result], options, recursive) 268 end 269 270 ############### 271 # Fenced code # 272 ############### 273 274 defp _parse( 275 [%Line.Fence{delimiter: delimiter, language: language, lnb: lnb} | rest], 276 result, 277 options, 278 recursive 279 ) do 280 {code_lines, rest} = 281 Enum.split_while(rest, fn line -> 282 !match?(%Line.Fence{delimiter: ^delimiter, language: _}, line) 283 end) 284 285 {rest1, options1} = _check_closing_fence(rest, lnb, delimiter, options) 286 code = for line <- code_lines, do: line.line 287 288 _parse( 289 rest1, 290 [%Block.Code{lines: code, language: language, lnb: lnb} | result], 291 options1, 292 recursive 293 ) 294 end 295 296 ############## 297 # HTML block # 298 ############## 299 defp _parse( 300 [opener = %Line.HtmlOpenTag{annotation: annotation, tag: tag, lnb: lnb} | rest], 301 result, 302 options, 303 recursive 304 ) do 305 {html_lines, rest1, unclosed, annotation} = _html_match_to_closing(opener, rest, annotation) 306 307 options1 = 308 add_messages( 309 options, 310 unclosed 311 |> Enum.map(fn %{lnb: lnb1, tag: tag} -> 312 {:warning, lnb1, "Failed to find closing <#{tag}>"} 313 end) 314 ) 315 316 html = Enum.reverse(html_lines) 317 318 _parse( 319 rest1, 320 [%Block.Html{tag: tag, html: html, lnb: lnb, annotation: annotation} | result], 321 options1, 322 recursive 323 ) 324 end 325 326 #################### 327 # HTML on one line # 328 #################### 329 330 defp _parse( 331 [%Line.HtmlOneLine{annotation: annotation, line: line, lnb: lnb} | rest], 332 result, 333 options, 334 recursive 335 ) do 336 _parse( 337 rest, 338 [%Block.HtmlOneline{annotation: annotation, html: [line], lnb: lnb} | result], 339 options, 340 recursive 341 ) 342 end 343 344 ################ 345 # HTML Comment # 346 ################ 347 348 defp _parse( 349 [line = %Line.HtmlComment{complete: true, lnb: lnb} | rest], 350 result, 351 options, 352 recursive 353 ) do 354 _parse(rest, [%Block.HtmlComment{lines: [line.line], lnb: lnb} | result], options, recursive) 355 end 356 357 defp _parse( 358 lines = [%Line.HtmlComment{complete: false, lnb: lnb} | _], 359 result, 360 options, 361 recursive 362 ) do 363 {html_lines, rest} = 364 Enum.split_while(lines, fn line -> 365 !(line.line =~ ~r/-->/) 366 end) 367 368 {html_lines, rest} = 369 if rest == [] do 370 {html_lines, rest} 371 else 372 {html_lines ++ [hd(rest)], tl(rest)} 373 end 374 375 html = for line <- html_lines, do: line.line 376 _parse(rest, [%Block.HtmlComment{lines: html, lnb: lnb} | result], options, recursive) 377 end 378 379 ################# 380 # ID definition # 381 ################# 382 383 defp _parse([defn = %Line.IdDef{lnb: lnb} | rest], result, options, recursive) do 384 _parse( 385 rest, 386 [%Block.IdDef{id: defn.id, url: defn.url, title: defn.title, lnb: lnb} | result], 387 options, 388 recursive 389 ) 390 end 391 392 ####################### 393 # Footnote Definition # 394 ####################### 395 396 # Starting from 1.5.0 Footnote Definitions are always at the end of the document (GFM) meaning that the 397 # `_parse` iteration can now end and we will trigger `_parse_fn_defs` 398 # this has the advantage that we can make the assumption that the top of the `result` 399 # list contains a `Block.FnList` element 400 defp _parse([%Line.FnDef{} | _] = input, result, options, _recursive) do 401 parse_fn_defs(input, result, options) 402 end 403 404 #################### 405 # IAL (attributes) # 406 #################### 407 408 defp _parse( 409 [%Line.Ial{attrs: attrs, lnb: lnb, verbatim: verbatim} | rest], 410 result, 411 options, 412 recursive 413 ) do 414 {options1, attributes} = parse_attrs(options, attrs, lnb) 415 416 _parse( 417 rest, 418 [%Block.Ial{attrs: attributes, content: attrs, lnb: lnb, verbatim: verbatim} | result], 419 options1, 420 recursive 421 ) 422 end 423 424 ############### 425 # Blank Lines # 426 ############### 427 # We've reached the point where empty lines are no longer significant 428 429 defp _parse([%Line.Blank{} | rest], result, options, recursive) do 430 _parse(rest, result, options, recursive) 431 end 432 433 ############################################################## 434 # Anything else... we warn, then treat it as if it were text # 435 ############################################################## 436 437 defp _parse([anything = %{lnb: lnb} | rest], result, options, recursive) do 438 _parse( 439 [%Line.Text{content: anything.line, lnb: lnb} | rest], 440 result, 441 add_message(options, {:warning, anything.lnb, "Unexpected line #{anything.line}"}), 442 recursive 443 ) 444 end 445 446 447 ####################################################### 448 # Assign attributes that follow a block to that block # 449 ####################################################### 450 451 defp assign_attributes_to_blocks([], result), do: result 452 453 defp assign_attributes_to_blocks([%Block.Ial{attrs: attrs}, block | rest], result) do 454 assign_attributes_to_blocks(rest, [%{block | attrs: attrs} | result]) 455 end 456 457 defp assign_attributes_to_blocks([block | rest], result) do 458 assign_attributes_to_blocks(rest, [block | result]) 459 end 460 461 defp _check_closing_fence(rest, lnb, delimiter, options) 462 defp _check_closing_fence([], lnb, delimiter, options) do 463 {[], add_message(options, {:error, lnb, "Fenced Code Block opened with #{delimiter} not closed at end of input"})} 464 end 465 defp _check_closing_fence([_|rest], _lnb, _delimiter, options) do 466 {rest, options} 467 end 468 469 ############################################################ 470 # Consolidate multiline inline code blocks into an element # 471 ############################################################ 472 @not_pending {nil, 0} 473 # ([#{},...]) -> {[#{}],[#{}],{'nil' | binary(),number()}} 474 # @spec consolidate_para( ts ) :: { ts, ts, {nil | String.t, number} } 475 defp consolidate_para(lines), do: _consolidate_para(lines, [], @not_pending, nil) 476 477 defp _consolidate_para([], result, pending, annotation) do 478 {result, [], pending, annotation} 479 end 480 481 defp _consolidate_para([line | rest] = lines, result, pending, annotation) do 482 case _inline_or_text?(line, pending) do 483 %{pending: still_pending, continue: true} -> 484 _consolidate_para(rest, [line | result], still_pending, annotation || line.annotation) 485 486 _ -> 487 {result, lines, @not_pending, annotation} 488 end 489 end 490 491 ################################################## 492 # Read in a table (consecutive TableLines with 493 # the same number of columns) 494 495 defp read_table(lines, col_count, rows) 496 497 defp read_table( 498 [%Line.TableLine{columns: cols} | rest], 499 col_count, 500 rows 501 ) 502 when length(cols) == col_count do 503 read_table(rest, col_count, [cols | rows]) 504 end 505 506 defp read_table(rest, col_count, rows) do 507 rows = Enum.reverse(rows) 508 table = Block.Table.new_for_columns(col_count) 509 510 table = 511 case look_for_alignments(rows) do 512 nil -> %Block.Table{table | rows: rows} 513 aligns -> %Block.Table{table | alignments: aligns, header: hd(rows), rows: tl(tl(rows))} 514 end 515 516 {table, [%Line.Blank{lnb: 0} | rest]} 517 end 518 519 defp look_for_alignments([_first, second | _rest]) do 520 if Enum.all?(second, fn row -> row =~ ~r{^:?-+:?$} end) do 521 second 522 |> Enum.map(fn row -> Regex.replace(~r/-+/, row, "-") end) 523 |> Enum.map(fn row -> 524 case row do 525 ":-:" -> :center 526 ":-" -> :left 527 "-" -> :left 528 "-:" -> :right 529 end 530 end) 531 else 532 nil 533 end 534 end 535 536 ##################################################### 537 # Traverse the block list and build a list of links # 538 ##################################################### 539 540 defp links_from_blocks(blocks) do 541 visit(blocks, Map.new(), &link_extractor/2) 542 end 543 544 defp link_extractor(item = %Block.IdDef{id: id}, result) do 545 Map.put(result, String.downcase(id), item) 546 end 547 548 defp link_extractor(_, result), do: result 549 550 ################################## 551 # Visitor pattern for each block # 552 ################################## 553 554 defp visit([], result, _func), do: result 555 556 # Structural node BlockQuote -> descend 557 defp visit([item = %Block.BlockQuote{blocks: blocks} | rest], result, func) do 558 result = func.(item, result) 559 result = visit(blocks, result, func) 560 visit(rest, result, func) 561 end 562 563 # Structural node List -> descend 564 defp visit([item = %Block.List{blocks: blocks} | rest], result, func) do 565 result = func.(item, result) 566 result = visit(blocks, result, func) 567 visit(rest, result, func) 568 end 569 570 # Structural node ListItem -> descend 571 defp visit([item = %Block.ListItem{blocks: blocks} | rest], result, func) do 572 result = func.(item, result) 573 result = visit(blocks, result, func) 574 visit(rest, result, func) 575 end 576 577 # Leaf, leaf it alone 578 defp visit([item | rest], result, func) do 579 result = func.(item, result) 580 visit(rest, result, func) 581 end 582 583 ################################################################### 584 # Consume HTML, taking care of nesting. Assumes one tag per line. # 585 ################################################################### 586 587 defp _html_match_to_closing(opener, rest, annotation), 588 do: _find_closing_tags([opener], rest, [opener.line], [], annotation) 589 590 defp _find_closing_tags(needed, input, html_lines, text_lines, annotation) 591 592 # No more open tags, happy case 593 defp _find_closing_tags([], rest, html_lines, [], annotation), 594 do: {html_lines, rest, [], annotation} 595 596 # run out of input, unhappy case 597 defp _find_closing_tags(needed, [], html_lines, text_lines, annotation), 598 do: {_add_text_lines(html_lines, text_lines), [], needed, annotation} 599 600 # still more lines, still needed closing 601 defp _find_closing_tags( 602 needed = [needed_hd | needed_tl], 603 [rest_hd | rest_tl], 604 html_lines, 605 text_lines, 606 annotation 607 ) do 608 cond do 609 _closes_tag?(rest_hd, needed_hd) -> 610 _find_closing_tags( 611 needed_tl, 612 rest_tl, 613 [rest_hd.line | _add_text_lines(html_lines, text_lines)], 614 [], 615 _override_annotation(annotation, rest_hd) 616 ) 617 618 _opens_tag?(rest_hd) -> 619 _find_closing_tags( 620 [rest_hd | needed], 621 rest_tl, 622 [rest_hd.line | _add_text_lines(html_lines, text_lines)], 623 [], 624 annotation 625 ) 626 627 true -> 628 _find_closing_tags(needed, rest_tl, html_lines, [rest_hd.line | text_lines], annotation) 629 end 630 end 631 632 defp _add_text_lines(html_lines, []), 633 do: html_lines 634 635 defp _add_text_lines(html_lines, text_lines), 636 do: [text_lines |> Enum.reverse() |> Enum.join("\n") | html_lines] 637 638 ########### 639 # Helpers # 640 ########### 641 642 defp _closes_tag?(%Line.HtmlCloseTag{tag: ctag}, %Line.HtmlOpenTag{tag: otag}) do 643 ctag == otag 644 end 645 646 defp _closes_tag?(_, _), do: false 647 648 defp _opens_tag?(%Line.HtmlOpenTag{}), do: true 649 defp _opens_tag?(_), do: false 650 651 defp _inline_or_text?(line, pending) 652 653 defp _inline_or_text?(line = %Line.Text{}, @not_pending) do 654 pending = opens_inline_code(line) 655 %{pending: pending, continue: true} 656 end 657 658 defp _inline_or_text?(line = %Line.TableLine{}, @not_pending) do 659 pending = opens_inline_code(line) 660 %{pending: pending, continue: true} 661 end 662 663 defp _inline_or_text?(_line, @not_pending), do: %{pending: @not_pending, continue: false} 664 665 defp _inline_or_text?(line, pending) do 666 pending = still_inline_code(line, pending) 667 %{pending: pending, continue: true} 668 end 669 670 defp _override_annotation(annotation, line), do: annotation || line.annotation 671 672 defp remove_trailing_blank_lines(lines) do 673 lines 674 |> Enum.reverse() 675 |> Enum.drop_while(&blank?/1) 676 |> Enum.reverse() 677 end 678 679 def prepend_ial(context, maybeatts, lnb, result) 680 def prepend_ial(context, nil, _lnb, result), do: {context, result} 681 682 def prepend_ial(context, ial, lnb, result) do 683 {context1, attributes} = parse_attrs(context, ial, lnb) 684 {context1, [%Block.Ial{attrs: attributes, content: ial, lnb: lnb, verbatim: ial} | result]} 685 end 686 end 687 688 # SPDX-License-Identifier: Apache-2.0