zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

lexer.ex (2839B)


      1 defmodule Makeup.Lexer do
      2   @moduledoc """
      3   A lexer turns raw source code into a list of tokens.
      4   """
      5   alias Makeup.Lexer.Types, as: T
      6   alias Makeup.Lexer.Postprocess
      7 
      8   @doc """
      9   Parses the smallest number of tokens that make sense.
     10   It's a `parsec`.
     11   """
     12   @callback root_element(String.t) :: T.parsec_result
     13 
     14   @doc """
     15   Parses the given string into a `parsec` result that includes a list of tokens.
     16   """
     17   @callback root(String.t) :: T.parsec_result
     18 
     19   @doc """
     20   Post-processes a list of tokens before matching the contained groups.
     21   """
     22   @callback postprocess([T.token()], list()) :: [T.token()]
     23 
     24   @doc """
     25   Matches groups in a list of tokens.
     26   """
     27   @callback match_groups([T.token()], String.t) :: [T.token()]
     28 
     29   @doc """
     30   Lexes a string into a list of tokens
     31   """
     32   @callback lex(String.t(), list()) :: [T.token()]
     33 
     34 
     35   @doc """
     36   Merges the token values into the original string.
     37 
     38   Inverts the output of a lexer. That is, if `lexer` is a lexer, then:
     39 
     40       string |> lexer.lex() |> Makeup.Lexer.unlex() == string
     41 
     42   This only works for a correctly implemented lexer, of course.
     43   The above identity can be treated as a lexer invariant for newly implemented lexers.
     44   """
     45   @spec unlex(list(T.token())) :: String.t()
     46   def unlex(tokens) do
     47     tokens
     48     |> Enum.map(&Postprocess.token_value_to_binary/1)
     49     |> Enum.map(fn {_tag, _meta, value} -> value end)
     50     |> Enum.join()
     51   end
     52 
     53   @doc """
     54   Splits a list of tokens on newline characters (`\n`).
     55 
     56   The result is a list of lists of tokens with no newlines.
     57   """
     58   @spec split_into_lines(list(T.token())) :: list(list(T.token()))
     59   def split_into_lines(tokens) do
     60     {lines, last_line} =
     61       Enum.reduce tokens, {[], []}, (fn token, {lines, line} ->
     62         {ttype, meta, text} = Postprocess.token_value_to_binary(token)
     63         case String.split(text, "\n") do
     64           [_] -> {lines, [token | line]}
     65           [part | parts] ->
     66             first_line = [{ttype, meta, part} | line] |> :lists.reverse
     67 
     68             all_but_last_line =
     69               parts
     70               |> Enum.slice(0..-2)
     71               |> Enum.map(fn tok_text -> [{ttype, meta, tok_text}] end)
     72               |> :lists.reverse
     73 
     74             last_line = [{ttype, meta, Enum.at(parts, -1)}]
     75 
     76             {all_but_last_line ++ [first_line | lines], last_line}
     77         end
     78       end)
     79 
     80     :lists.reverse([last_line | lines])
     81   end
     82 
     83   @doc """
     84   Merge adjacent tokens of the same type and with the same attributes.
     85 
     86   Doing this will require iterating over the list of tokens again,
     87   so only do this if you have a good reason.
     88   """
     89   @spec merge(list(T.token())) :: list(T.token())
     90   def merge([{tag, meta, value1}, {tag, meta, value2} | rest]),
     91     do: merge [{tag, meta, value1 <> value2} | rest]
     92   def merge([token | rest]),
     93     do: [token | merge(rest)]
     94   def merge([]),
     95     do: []
     96 end