html_parser.ex (2791B)
1 defmodule EarmarkParser.Helpers.HtmlParser do 2 3 @moduledoc false 4 5 import EarmarkParser.Helpers.StringHelpers, only: [behead: 2] 6 import EarmarkParser.LineScanner, only: [void_tag?: 1] 7 8 def parse_html(lines) 9 def parse_html([tag_line|rest]) do 10 case _parse_tag(tag_line) do 11 { :ok, tag, "" } -> [_parse_rest(rest, tag, [])] 12 { :ok, tag, suffix } -> [_parse_rest(rest, tag, [suffix])] 13 { :ext, tag, "" } -> [_parse_rest(rest, tag, [])] 14 { :ext, tag, suffix } -> [_parse_rest(rest, tag, []), [suffix]] 15 end 16 end 17 18 # Parse One Tag 19 # ------------- 20 21 @quoted_attr ~r{\A ([-\w]+) \s* = \s* (["']) (.*?) \2 \s*}x 22 @unquoted_attr ~r{\A ([-\w]+) (?: \s* = \s* ([^&\s]*))? \s*}x 23 defp _parse_atts(string, tag, atts) do 24 case Regex.run(@quoted_attr, string) do 25 [all, name, _delim, value] -> _parse_atts(behead(string, all), tag, [{name, value}|atts]) 26 _ -> case Regex.run(@unquoted_attr, string) do 27 [all, name, value] -> _parse_atts(behead(string, all), tag, [{name, value}|atts]) 28 [all, name] -> _parse_atts(behead(string, all), tag, [{name, name}|atts]) 29 _ -> _parse_tag_tail(string, tag, atts) 30 end 31 end 32 end 33 34 # Are leading and trailing "-"s ok? 35 @tag_head ~r{\A \s* <([-\w]+) \s*}x 36 defp _parse_tag(string) do 37 case Regex.run(@tag_head, string) do 38 [all, tag] -> _parse_atts(behead(string, all), tag, []) 39 end 40 end 41 42 @tag_tail ~r{\A .*? (/?)> \s* (.*) \z}x 43 defp _parse_tag_tail(string, tag, atts) do 44 case Regex.run(@tag_tail, string) do 45 [_, closing, suffix] -> 46 suffix1 = String.replace(suffix, ~r{\s*</#{tag}>.*}, "") 47 _close_tag_tail(tag, atts, closing != "", suffix1) 48 end 49 end 50 51 defp _close_tag_tail(tag, atts, closing?, suffix) do 52 if closing? || void_tag?(tag) do 53 {:ext, {tag, Enum.reverse(atts)}, suffix } 54 else 55 {:ok, {tag, Enum.reverse(atts)}, suffix } 56 end 57 end 58 59 # Iterate over lines inside a tag 60 # ------------------------------- 61 62 @verbatim %{verbatim: true} 63 defp _parse_rest(rest, tag_tpl, lines) 64 defp _parse_rest([], tag_tpl, lines) do 65 tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim) 66 end 67 defp _parse_rest([last_line], {tag, _}=tag_tpl, lines) do 68 case Regex.run(~r{\A\s*</#{tag}>\s*(.*)}, last_line) do 69 nil -> tag_tpl |> Tuple.append(Enum.reverse([last_line|lines])) |> Tuple.append(@verbatim) 70 [_, ""] -> tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim) 71 [_, suffix] -> [tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim), suffix] 72 end 73 end 74 defp _parse_rest([inner_line|rest], tag_tpl, lines) do 75 _parse_rest(rest, tag_tpl, [inner_line|lines]) 76 end 77 78 end