zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

lexer.ex (10129B)


      1 defmodule Absinthe.Lexer do
      2   import NimbleParsec
      3 
      4   # Codepoints
      5   @horizontal_tab 0x0009
      6   @newline 0x000A
      7   @carriage_return 0x000D
      8   @space 0x0020
      9   @unicode_bom 0xFEFF
     10 
     11   # SourceCharacter :: /[\u0009\u000A\u000D\u0020-\uFFFF]/
     12 
     13   any_unicode = utf8_char([])
     14 
     15   # ## Ignored Tokens
     16 
     17   # UnicodeBOM :: "Byte Order Mark (U+FEFF)"
     18   unicode_bom = utf8_char([@unicode_bom])
     19 
     20   # WhiteSpace ::
     21   #   - "Horizontal Tab (U+0009)"
     22   #   - "Space (U+0020)"
     23   whitespace =
     24     ascii_char([
     25       @horizontal_tab,
     26       @space
     27     ])
     28 
     29   # LineTerminator ::
     30   #   - "New Line (U+000A)"
     31   #   - "Carriage Return (U+000D)" [ lookahead ! "New Line (U+000A)" ]
     32   #   - "Carriage Return (U+000D)" "New Line (U+000A)"
     33   line_terminator =
     34     choice([
     35       ascii_char([@newline]),
     36       ascii_char([@carriage_return])
     37       |> optional(ascii_char([@newline]))
     38     ])
     39 
     40   # Comment :: `#` CommentChar*
     41   # CommentChar :: SourceCharacter but not LineTerminator
     42   comment =
     43     string("#")
     44     |> repeat_while(any_unicode, {:not_line_terminator, []})
     45 
     46   # Comma :: ,
     47   comma = ascii_char([?,])
     48 
     49   # Ampersand :: &
     50   ampersand = ascii_char([?&])
     51 
     52   # Ignored ::
     53   #   - UnicodeBOM
     54   #   - WhiteSpace
     55   #   - LineTerminator
     56   #   - Comment
     57   #   - Comma
     58   #   - Ampersand
     59   ignored =
     60     choice([
     61       unicode_bom,
     62       whitespace,
     63       line_terminator,
     64       comment,
     65       comma,
     66       ampersand
     67     ])
     68 
     69   # ## Lexical Tokens
     70 
     71   #   - Punctuator
     72   #   - Name
     73   #   - IntValue
     74   #   - FloatValue
     75   #   - StringValue
     76 
     77   punctuator =
     78     choice([
     79       ascii_char([
     80         ?!,
     81         ?$,
     82         ?(,
     83         ?),
     84         ?:,
     85         ?=,
     86         ?@,
     87         ?[,
     88         ?],
     89         ?{,
     90         ?|,
     91         ?}
     92       ]),
     93       times(ascii_char([?.]), 3)
     94     ])
     95     |> post_traverse({:atom_token, []})
     96 
     97   boolean_value_or_name_or_reserved_word =
     98     ascii_char([?_, ?A..?Z, ?a..?z])
     99     |> repeat(ascii_char([?_, ?0..?9, ?A..?Z, ?a..?z]))
    100     |> post_traverse({:boolean_value_or_name_or_reserved_word, []})
    101 
    102   # NegativeSign :: -
    103   negative_sign = ascii_char([?-])
    104 
    105   # Digit :: one of 0 1 2 3 4 5 6 7 8 9
    106   digit = ascii_char([?0..?9])
    107 
    108   # NonZeroDigit :: Digit but not `0`
    109   non_zero_digit = ascii_char([?1..?9])
    110 
    111   # IntegerPart ::
    112   #   - NegativeSign? 0
    113   #   - NegativeSign? NonZeroDigit Digit*
    114   integer_part =
    115     optional(negative_sign)
    116     |> choice([
    117       ascii_char([?0]),
    118       non_zero_digit |> repeat(digit)
    119     ])
    120 
    121   # IntValue :: IntegerPart
    122   int_value =
    123     empty()
    124     |> concat(integer_part)
    125     |> post_traverse({:labeled_token, [:int_value]})
    126 
    127   # FractionalPart :: . Digit+
    128   fractional_part =
    129     ascii_char([?.])
    130     |> times(digit, min: 1)
    131 
    132   # ExponentIndicator :: one of `e` `E`
    133   exponent_indicator = ascii_char([?e, ?E])
    134 
    135   # Sign :: one of + -
    136   sign = ascii_char([?+, ?-])
    137 
    138   # ExponentPart :: ExponentIndicator Sign? Digit+
    139   exponent_part =
    140     exponent_indicator
    141     |> optional(sign)
    142     |> times(digit, min: 1)
    143 
    144   # FloatValue ::
    145   #   - IntegerPart FractionalPart
    146   #   - IntegerPart ExponentPart
    147   #   - IntegerPart FractionalPart ExponentPart
    148   float_value =
    149     choice([
    150       integer_part |> concat(fractional_part) |> concat(exponent_part),
    151       integer_part |> post_traverse({:fill_mantissa, []}) |> concat(exponent_part),
    152       integer_part |> concat(fractional_part)
    153     ])
    154     |> post_traverse({:labeled_token, [:float_value]})
    155 
    156   # EscapedUnicode :: /[0-9A-Fa-f]{4}/
    157   escaped_unicode =
    158     times(ascii_char([?0..?9, ?A..?F, ?a..?f]), 4)
    159     |> post_traverse({:unescape_unicode, []})
    160 
    161   # EscapedCharacter :: one of `"` \ `/` b f n r t
    162   escaped_character =
    163     choice([
    164       ascii_char([?"]),
    165       ascii_char([?\\]),
    166       ascii_char([?/]),
    167       ascii_char([?b]) |> replace(?\b),
    168       ascii_char([?f]) |> replace(?\f),
    169       ascii_char([?n]) |> replace(?\n),
    170       ascii_char([?r]) |> replace(?\r),
    171       ascii_char([?t]) |> replace(?\t)
    172     ])
    173 
    174   # StringCharacter ::
    175   #   - SourceCharacter but not `"` or \ or LineTerminator
    176   #   - \u EscapedUnicode
    177   #   - \ EscapedCharacter
    178   string_character =
    179     choice([
    180       ignore(string(~S(\u))) |> concat(escaped_unicode),
    181       ignore(ascii_char([?\\])) |> concat(escaped_character),
    182       any_unicode
    183     ])
    184 
    185   # BlockStringCharacter ::
    186   #   - SourceCharacter but not `"""` or `\"""`
    187   #   - `\"""`
    188 
    189   # Note: Block string values are interpreted to exclude blank initial and trailing
    190   # lines and uniform indentation with {BlockStringValue()}.
    191   block_string_character =
    192     choice([
    193       ignore(ascii_char([?\\])) |> concat(times(ascii_char([?"]), 3)),
    194       any_unicode
    195     ])
    196 
    197   # StringValue ::
    198   #   - `"` StringCharacter* `"`
    199   #   - `"""` BlockStringCharacter* `"""`
    200   string_value =
    201     ignore(ascii_char([?"]))
    202     |> post_traverse({:mark_string_start, []})
    203     |> repeat_while(string_character, {:not_end_of_quote, []})
    204     |> ignore(ascii_char([?"]))
    205     |> post_traverse({:string_value_token, []})
    206 
    207   block_string_value =
    208     ignore(string(~S(""")))
    209     |> post_traverse({:mark_block_string_start, []})
    210     |> repeat_while(block_string_character, {:not_end_of_block_quote, []})
    211     |> ignore(string(~S(""")))
    212     |> post_traverse({:block_string_value_token, []})
    213 
    214   defp not_end_of_quote(<<?", _::binary>>, context, _, _) do
    215     {:halt, context}
    216   end
    217 
    218   defp not_end_of_quote(rest, context, current_line, current_offset) do
    219     not_line_terminator(rest, context, current_line, current_offset)
    220   end
    221 
    222   defp not_end_of_block_quote(<<?", ?", ?", _::binary>>, context, _, _) do
    223     {:halt, context}
    224   end
    225 
    226   defp not_end_of_block_quote(_, context, _, _) do
    227     {:cont, context}
    228   end
    229 
    230   @spec tokenize(binary()) :: {:ok, [any()]} | {:error, binary(), {integer(), non_neg_integer()}}
    231   def tokenize(input) do
    232     lines = String.split(input, ~r/\r?\n/)
    233 
    234     case do_tokenize(input) do
    235       {:ok, tokens, "", _, _, _} ->
    236         tokens = Enum.map(tokens, &convert_token_column(&1, lines))
    237         {:ok, tokens}
    238 
    239       {:ok, _, rest, _, {line, line_offset}, byte_offset} ->
    240         byte_column = byte_offset - line_offset + 1
    241         {:error, rest, byte_loc_to_char_loc({line, byte_column}, lines)}
    242     end
    243   end
    244 
    245   defp convert_token_column({ident, loc, data}, lines) do
    246     {ident, byte_loc_to_char_loc(loc, lines), data}
    247   end
    248 
    249   defp convert_token_column({ident, loc}, lines) do
    250     {ident, byte_loc_to_char_loc(loc, lines)}
    251   end
    252 
    253   defp byte_loc_to_char_loc({line, byte_col}, lines) do
    254     current_line = Enum.at(lines, line - 1)
    255     byte_prefix = binary_part(current_line, 0, byte_col)
    256     char_col = String.length(byte_prefix)
    257     {line, char_col}
    258   end
    259 
    260   @spec do_tokenize(binary()) ::
    261           {:ok, [any()], binary(), map(), {pos_integer(), pos_integer()}, pos_integer()}
    262   defparsec(
    263     :do_tokenize,
    264     repeat(
    265       choice([
    266         ignore(ignored),
    267         comment,
    268         punctuator,
    269         block_string_value,
    270         string_value,
    271         float_value,
    272         int_value,
    273         boolean_value_or_name_or_reserved_word
    274       ])
    275     )
    276   )
    277 
    278   defp fill_mantissa(_rest, raw, context, _, _), do: {'0.' ++ raw, context}
    279 
    280   defp unescape_unicode(_rest, content, context, _loc, _) do
    281     code = content |> Enum.reverse()
    282     value = :erlang.list_to_integer(code, 16)
    283     binary = :unicode.characters_to_binary([value])
    284     {[binary], context}
    285   end
    286 
    287   @boolean_words ~w(
    288     true
    289     false
    290   ) |> Enum.map(&String.to_charlist/1)
    291 
    292   @reserved_words ~w(
    293     directive
    294     enum
    295     extend
    296     fragment
    297     implements
    298     input
    299     interface
    300     mutation
    301     null
    302     on
    303     ON
    304     query
    305     repeatable
    306     scalar
    307     schema
    308     subscription
    309     type
    310     union
    311   ) |> Enum.map(&String.to_charlist/1)
    312 
    313   defp boolean_value_or_name_or_reserved_word(rest, chars, context, loc, byte_offset) do
    314     value = chars |> Enum.reverse()
    315     do_boolean_value_or_name_or_reserved_word(rest, value, context, loc, byte_offset)
    316   end
    317 
    318   defp do_boolean_value_or_name_or_reserved_word(_rest, value, context, loc, byte_offset)
    319        when value in @boolean_words do
    320     {[{:boolean_value, line_and_column(loc, byte_offset, length(value)), value}], context}
    321   end
    322 
    323   defp do_boolean_value_or_name_or_reserved_word(_rest, value, context, loc, byte_offset)
    324        when value in @reserved_words do
    325     token_name = value |> List.to_atom()
    326     {[{token_name, line_and_column(loc, byte_offset, length(value))}], context}
    327   end
    328 
    329   defp do_boolean_value_or_name_or_reserved_word(_rest, value, context, loc, byte_offset) do
    330     {[{:name, line_and_column(loc, byte_offset, length(value)), value}], context}
    331   end
    332 
    333   defp labeled_token(_rest, chars, context, loc, byte_offset, token_name) do
    334     value = chars |> Enum.reverse()
    335     {[{token_name, line_and_column(loc, byte_offset, length(value)), value}], context}
    336   end
    337 
    338   defp mark_string_start(_rest, chars, context, loc, byte_offset) do
    339     {[chars], Map.put(context, :token_location, line_and_column(loc, byte_offset, 1))}
    340   end
    341 
    342   defp mark_block_string_start(_rest, _chars, context, loc, byte_offset) do
    343     {[], Map.put(context, :token_location, line_and_column(loc, byte_offset, 3))}
    344   end
    345 
    346   defp block_string_value_token(_rest, chars, context, _loc, _byte_offset) do
    347     value = '"""' ++ (chars |> Enum.reverse()) ++ '"""'
    348     {[{:block_string_value, context.token_location, value}], Map.delete(context, :token_location)}
    349   end
    350 
    351   defp string_value_token(_rest, chars, context, _loc, _byte_offset) do
    352     value = '"' ++ tl(chars |> Enum.reverse()) ++ '"'
    353     {[{:string_value, context.token_location, value}], Map.delete(context, :token_location)}
    354   end
    355 
    356   defp atom_token(_rest, chars, context, loc, byte_offset) do
    357     value = chars |> Enum.reverse()
    358     token_atom = value |> List.to_atom()
    359     {[{token_atom, line_and_column(loc, byte_offset, length(value))}], context}
    360   end
    361 
    362   def line_and_column({line, line_offset}, byte_offset, column_correction) do
    363     column = byte_offset - line_offset - column_correction + 1
    364     {line, column}
    365   end
    366 
    367   defp not_line_terminator(<<?\n, _::binary>>, context, _, _), do: {:halt, context}
    368   defp not_line_terminator(<<?\r, _::binary>>, context, _, _), do: {:halt, context}
    369   defp not_line_terminator(_, context, _, _), do: {:cont, context}
    370 end