zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

combinators.ex (7288B)


      1 defmodule Makeup.Lexer.Combinators do
      2   @moduledoc """
      3   Common components useful in many lexers.
      4   """
      5   import NimbleParsec
      6 
      7   @doc """
      8   Wraps the given combinator into a token of the given `ttype`.
      9 
     10   Instead of a combinator, the first argument can also be a string literal.
     11   """
     12   def token(literal, token_type) when is_binary(literal) do
     13     replace(string(literal), {token_type, %{}, literal})
     14   end
     15 
     16   def token(combinator, token_type) do
     17     combinator |> post_traverse({__MODULE__, :__token__, [token_type]})
     18   end
     19 
     20   def token(literal, token_type, attrs) when is_binary(literal) and is_map(attrs) do
     21     replace(string(literal), {token_type, attrs, literal})
     22   end
     23 
     24   def token(combinator, token_type, attrs) when is_map(attrs) do
     25     combinator |> post_traverse({__MODULE__, :__token__, [token_type, attrs]})
     26   end
     27 
     28   @doc """
     29   Joins the result of the given combinator into a single string.
     30 
     31   This is not usually necessary, but it can be useful if you want to match on the tokens.
     32   It's easier to match on the token `{:keyword, %{}, "unquote"}` than on something like
     33   `{:keyword, %{}, ["u", "nquote"]}`, even though both tokens will be treated the same way
     34   by the formatter.
     35   """
     36   def lexeme(combinator) do
     37     combinator |> post_traverse({__MODULE__, :__lexeme__, []})
     38   end
     39 
     40   @doc false
     41   def __token__(rest, [arg], context, _line, _offset, token_type) do
     42     {rest, [{token_type, %{}, arg}], context}
     43   end
     44 
     45   def __token__(rest, arg, context, _line, _offset, token_type) when is_binary(arg) do
     46     {rest, [{token_type, %{}, arg}], context}
     47   end
     48 
     49   def __token__(rest, args, context, _line, _offset, token_type) do
     50     {rest, [{token_type, %{}, args |> :lists.reverse()}], context}
     51   end
     52 
     53   @doc false
     54   def __token__(rest, [arg], context, _line, _offset, token_type, attrs) do
     55     {rest, [{token_type, attrs, arg}], context}
     56   end
     57 
     58   def __token__(rest, arg, context, _line, _offset, token_type, attrs) when is_binary(arg) do
     59     {rest, [{token_type, attrs, arg}], context}
     60   end
     61 
     62   def __token__(rest, args, context, _line, _offset, token_type, attrs) do
     63     {rest, [{token_type, attrs, args |> :lists.reverse()}], context}
     64   end
     65 
     66   @doc false
     67   def __lexeme__(rest, args, context, _line, _offset) do
     68     result = args |> List.wrap() |> :lists.reverse() |> to_string()
     69     {rest, [result], context}
     70   end
     71 
     72   defp reverse_sort(items) do
     73     Enum.sort(items, fn a, b -> {byte_size(a), a} > {byte_size(b), b} end)
     74   end
     75 
     76   @doc """
     77   Matches one of the literal strings in the list.
     78 
     79   The strings aren't matched in order: they are automatically sorted in a way
     80   that guarantees that the longest strings will be tried first.
     81 
     82   ## Examples
     83 
     84       keywords = word_from_list(~w[do end catch after rescue])
     85   """
     86   def word_from_list(words) do
     87     choice(for word <- reverse_sort(words), do: string(word))
     88   end
     89 
     90   @doc """
     91   Matches one of the literal strings in the list and wraps it in a token of the given type.
     92 
     93   This is is just a shorthand.
     94 
     95   The strings aren't matched in order: they are automatically sorted in a way
     96   that guarantees that the longest strings will be tried first.
     97 
     98   ## Examples
     99 
    100       keywords = word_from_list(~w[do end catch after rescue], :keyword)
    101   """
    102   def word_from_list(words, ttype) do
    103     choice(for word <- reverse_sort(words), do: string(word)) |> token(ttype)
    104   end
    105 
    106   @doc """
    107   Matches one of the literal strings in the list and wraps it in a token of the given `type`,
    108   with the given `attrs`.
    109 
    110   This is is just a shorthand.
    111 
    112   The strings aren't matched in order: they are automatically sorted in a way
    113   that guarantees that the longest strings will be tried first.
    114   """
    115   def word_from_list(words, ttype, attrs) do
    116     choice(for word <- reverse_sort(words), do: string(word)) |> token(ttype, attrs)
    117   end
    118 
    119   @doc """
    120   Matches a given combinator, repeated 0 or more times, surrounded by left and right delimiters.
    121 
    122   Delimiters can be combinators or literal strings (either both combinators or both literal strings).
    123   """
    124   def many_surrounded_by(combinator, left, right) when is_binary(left) and is_binary(right) do
    125     token(left, :punctuation)
    126     |> concat(
    127       repeat(
    128         lookahead_not(string(right))
    129         |> concat(combinator)
    130       )
    131     )
    132     |> concat(token(right, :punctuation))
    133   end
    134 
    135   def many_surrounded_by(combinator, left, right) do
    136     left
    137     |> concat(
    138       repeat(
    139         lookahead_not(right)
    140         |> concat(combinator)
    141       )
    142     )
    143     |> concat(right)
    144   end
    145 
    146   @doc """
    147   Matches a given combinator, repeated 0 or more times, surrounded by left and right delimiters,
    148   and wraps the `right` and `left` delimiters into a token of the given `ttype`.
    149   """
    150   def many_surrounded_by(combinator, left, right, ttype) do
    151     token(left, ttype)
    152     |> concat(
    153       repeat(
    154         lookahead_not(string(right))
    155         |> concat(combinator)
    156       )
    157     )
    158     |> concat(token(right, ttype))
    159   end
    160 
    161   @doc false
    162   def collect_raw_chars_and_binaries(rest, args, context, _line, _offset, ttype, attrs) do
    163     result = merge_chars_helper(ttype, attrs, [], args)
    164     {rest, result, context}
    165   end
    166 
    167   defp merge_chars_helper(_ttype, _attrs, [], []), do: []
    168 
    169   defp merge_chars_helper(ttype, attrs, acc, [next | rest])
    170        when is_integer(next) or is_binary(next) do
    171     merge_chars_helper(ttype, attrs, [next | acc], rest)
    172   end
    173 
    174   defp merge_chars_helper(ttype, attrs, [], [element | rest]) do
    175     [element | merge_chars_helper(ttype, attrs, [], rest)]
    176   end
    177 
    178   defp merge_chars_helper(ttype, attrs, acc, list) do
    179     tok = {ttype, attrs, acc}
    180     [tok | merge_chars_helper(ttype, attrs, [], list)]
    181   end
    182 
    183   @doc """
    184   A generic combinator for string-like syntactic structures.
    185 
    186   It takes the following parameters:
    187 
    188     * `left` - left delimiter for the string. Can be a binary or a general combinator.
    189     * `right` - right delimiter for the string. Can be a binary or a general combinator
    190     * `middle` - a list of parsers to run inside the string which parse entities
    191       that aren't characters.
    192       The most common example are special characters and string interpolation
    193       for languages that support it like Elixir.
    194     * `ttype` - the token type to use for the string delimiters and ordinary characters
    195       (tokens parsd by the )
    196     * `attrs` - metadata attributes for the string delimiters and ordinary characters
    197 
    198   ## Examples
    199 
    200       single_quoted_heredocs = string_like(
    201         "'''",
    202         "'''",
    203         combinators_inside_string,
    204         :string_char
    205       )
    206 
    207   The above is equivalent to the following more explicit version:
    208 
    209       single_quoted_heredocs = string_like(
    210         string("'''"),
    211         string("'''"),
    212         combinators_inside_string,
    213         :string_char
    214       )
    215   """
    216   def string_like(left, right, middle, ttype, attrs \\ %{}) when is_list(middle) do
    217     left_combinator =
    218       case is_binary(left) do
    219         true -> string(left)
    220         false -> left
    221       end
    222 
    223     right_combinator =
    224       case is_binary(right) do
    225         true -> string(right)
    226         false -> right
    227       end
    228 
    229     choices = middle ++ [utf8_char([])]
    230 
    231     left_combinator
    232     |> repeat(lookahead_not(right_combinator) |> choice(choices))
    233     |> concat(right_combinator)
    234     |> post_traverse({__MODULE__, :collect_raw_chars_and_binaries, [ttype, attrs]})
    235   end
    236 end