zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

html_parser.ex (2791B)


      1 defmodule EarmarkParser.Helpers.HtmlParser do
      2 
      3   @moduledoc false
      4 
      5   import EarmarkParser.Helpers.StringHelpers, only: [behead: 2]
      6   import EarmarkParser.LineScanner, only: [void_tag?: 1]
      7 
      8   def parse_html(lines)
      9   def parse_html([tag_line|rest]) do
     10     case _parse_tag(tag_line) do
     11       { :ok, tag, "" }      -> [_parse_rest(rest, tag, [])]
     12       { :ok, tag, suffix }  -> [_parse_rest(rest, tag, [suffix])]
     13       { :ext, tag, "" }     -> [_parse_rest(rest, tag, [])]
     14       { :ext, tag, suffix } -> [_parse_rest(rest, tag, []), [suffix]]
     15     end
     16   end
     17 
     18   # Parse One Tag
     19   # -------------
     20 
     21   @quoted_attr ~r{\A ([-\w]+) \s* = \s* (["']) (.*?) \2 \s*}x
     22   @unquoted_attr ~r{\A ([-\w]+) (?: \s* = \s* ([^&\s]*))? \s*}x
     23   defp _parse_atts(string, tag, atts) do
     24     case Regex.run(@quoted_attr, string) do
     25       [all, name, _delim, value] -> _parse_atts(behead(string, all), tag, [{name, value}|atts])
     26       _ -> case Regex.run(@unquoted_attr, string) do
     27              [all, name, value] -> _parse_atts(behead(string, all), tag, [{name, value}|atts])
     28              [all, name]        -> _parse_atts(behead(string, all), tag, [{name, name}|atts])
     29              _                  -> _parse_tag_tail(string, tag, atts)
     30       end
     31     end
     32   end
     33 
     34   # Are leading and trailing "-"s ok?
     35   @tag_head ~r{\A \s* <([-\w]+) \s*}x
     36   defp _parse_tag(string) do
     37     case Regex.run(@tag_head, string) do
     38       [all, tag] -> _parse_atts(behead(string, all), tag, [])
     39     end
     40   end
     41 
     42   @tag_tail ~r{\A .*? (/?)> \s* (.*) \z}x
     43   defp _parse_tag_tail(string, tag, atts) do
     44     case Regex.run(@tag_tail, string) do
     45       [_, closing, suffix]  ->
     46         suffix1 = String.replace(suffix, ~r{\s*</#{tag}>.*}, "")
     47         _close_tag_tail(tag, atts, closing != "", suffix1)
     48     end
     49   end
     50 
     51   defp _close_tag_tail(tag, atts, closing?, suffix) do
     52     if closing? || void_tag?(tag) do
     53       {:ext, {tag, Enum.reverse(atts)}, suffix }
     54     else
     55       {:ok, {tag, Enum.reverse(atts)}, suffix }
     56     end
     57   end
     58 
     59   # Iterate over lines inside a tag
     60   # -------------------------------
     61 
     62   @verbatim %{verbatim: true}
     63   defp _parse_rest(rest, tag_tpl, lines)
     64   defp _parse_rest([], tag_tpl, lines) do
     65     tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim)
     66   end
     67   defp _parse_rest([last_line], {tag, _}=tag_tpl, lines) do
     68     case Regex.run(~r{\A\s*</#{tag}>\s*(.*)}, last_line) do
     69       nil         -> tag_tpl |> Tuple.append(Enum.reverse([last_line|lines])) |> Tuple.append(@verbatim)
     70       [_, ""]     -> tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim)
     71       [_, suffix] -> [tag_tpl |> Tuple.append(Enum.reverse(lines)) |> Tuple.append(@verbatim), suffix]
     72     end
     73   end
     74   defp _parse_rest([inner_line|rest], tag_tpl, lines) do
     75     _parse_rest(rest, tag_tpl, [inner_line|lines])
     76   end
     77 
     78 end