nimble_parsec.ex (65044B)
1 defmodule NimbleParsec do 2 @moduledoc "README.md" 3 |> File.read!() 4 |> String.split("<!-- MDOC !-->") 5 |> Enum.fetch!(1) 6 7 defmacrop is_combinator(combinator) do 8 quote do 9 is_list(unquote(combinator)) 10 end 11 end 12 13 @doc """ 14 Defines a parser (and a combinator) with the given `name` and `opts`. 15 16 The parser is a function that receives two arguments, the binary 17 to be parsed and a set of options. You can consult the documentation 18 of the generated parser function for more information. 19 20 This function will also define a combinator that can be used as 21 `parsec(name)` when building other parsers. See `parsec/2` for 22 more information on invoking compiled combinators. 23 24 ## Beware! 25 26 `defparsec/3` is executed during compilation. This means you can't 27 invoke a function defined in the same module. The following will error 28 because the `date` function has not yet been defined: 29 30 defmodule MyParser do 31 import NimbleParsec 32 33 def date do 34 integer(4) 35 |> ignore(string("-")) 36 |> integer(2) 37 |> ignore(string("-")) 38 |> integer(2) 39 end 40 41 defparsec :date, date() 42 end 43 44 This can be solved in different ways. You may simply 45 compose a long parser using variables. For example: 46 47 defmodule MyParser do 48 import NimbleParsec 49 50 date = 51 integer(4) 52 |> ignore(string("-")) 53 |> integer(2) 54 |> ignore(string("-")) 55 |> integer(2) 56 57 defparsec :date, date 58 end 59 60 Alternatively, you may define a `Helpers` module with many 61 convenience combinators, and then invoke them in your parser 62 module: 63 64 defmodule MyParser.Helpers do 65 import NimbleParsec 66 67 def date do 68 integer(4) 69 |> ignore(string("-")) 70 |> integer(2) 71 |> ignore(string("-")) 72 |> integer(2) 73 end 74 end 75 76 defmodule MyParser do 77 import NimbleParsec 78 import MyParser.Helpers 79 80 defparsec :date, date() 81 end 82 83 The approach of using helper modules is the favorite way 84 of composing parsers in `NimbleParsec`. 85 86 ## Options 87 88 * `:inline` - when true, inlines clauses that work as redirection for 89 other clauses. It is disabled by default because of a bug in Elixir 90 v1.5 and v1.6 where unused functions that are inlined cause a 91 compilation error 92 93 * `:debug` - when true, writes generated clauses to `:stderr` for debugging 94 95 * `:export_combinator` - make the underlying combinator function public 96 so it can be used as part of `parsec/1` from other modules 97 98 * `:export_metadata` - export metadata necessary to use this parser 99 combinator to generate inputs 100 101 """ 102 defmacro defparsec(name, combinator, opts \\ []) do 103 visibility = quote do 104 if opts[:export_combinator], do: :def, else: :defp 105 end 106 107 compile(:def, visibility, name, combinator, opts) 108 end 109 110 @doc """ 111 Defines a private parser (and a combinator) with the given `name` and `opts`. 112 113 The same as `defparsec/3` but the parsing function is private. 114 """ 115 defmacro defparsecp(name, combinator, opts \\ []) do 116 compile(:defp, :defp, name, combinator, opts) 117 end 118 119 @doc """ 120 Defines a combinator with the given `name` and `opts`. 121 122 It is similar to `defparsec/3` except it does not define 123 an entry-point parsing function, just the combinator function 124 to be used with `parsec/2`. 125 """ 126 defmacro defcombinator(name, combinator, opts \\ []) do 127 compile(nil, :def, name, combinator, opts) 128 end 129 130 @doc """ 131 Defines a combinator with the given `name` and `opts`. 132 133 It is similar to `defparsecp/3` except it does not define 134 an entry-point parsing function, just the combinator function 135 to be used with `parsec/2`. 136 """ 137 defmacro defcombinatorp(name, combinator, opts \\ []) do 138 compile(nil, :defp, name, combinator, opts) 139 end 140 141 defp compile(parser_kind, combinator_kind, name, combinator, opts) do 142 prelude = 143 quote do 144 opts = unquote(opts) 145 combinator_kind = unquote(combinator_kind) 146 end 147 148 combinator = 149 quote bind_quoted: [ 150 parser_kind: parser_kind, 151 name: name, 152 combinator: combinator, 153 ] do 154 {defs, inline} = NimbleParsec.Compiler.compile(name, combinator, opts) 155 156 NimbleParsec.Recorder.record( 157 __MODULE__, 158 parser_kind, 159 combinator_kind, 160 name, 161 defs, 162 inline, 163 opts 164 ) 165 166 if opts[:export_metadata] do 167 def __nimble_parsec__(unquote(name)), 168 do: unquote(combinator |> Enum.reverse() |> Macro.escape()) 169 end 170 171 if inline != [] do 172 @compile {:inline, inline} 173 end 174 175 if combinator_kind == :def do 176 for {name, args, guards, body} <- defs do 177 def unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body) 178 end 179 else 180 for {name, args, guards, body} <- defs do 181 defp unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body) 182 end 183 end 184 end 185 186 parser = compile_parser(name, parser_kind) 187 188 quote do 189 unquote(prelude) 190 unquote(parser) 191 unquote(combinator) 192 end 193 end 194 195 defp compile_parser(_name, nil) do 196 :ok 197 end 198 199 defp compile_parser(name, :def) do 200 quote bind_quoted: [name: name] do 201 {doc, spec, {name, args, guards, body}} = NimbleParsec.Compiler.entry_point(name) 202 Module.get_attribute(__MODULE__, :doc) || @doc doc 203 @spec unquote(spec) 204 def unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body) 205 end 206 end 207 208 defp compile_parser(name, :defp) do 209 quote bind_quoted: [name: name] do 210 {_doc, spec, {name, args, guards, body}} = NimbleParsec.Compiler.entry_point(name) 211 @spec unquote(spec) 212 defp unquote(name)(unquote_splicing(args)) when unquote(guards), do: unquote(body) 213 end 214 end 215 216 @opaque t :: [combinator] 217 @type bin_modifier :: :integer | :utf8 | :utf16 | :utf32 218 @type range :: inclusive_range | exclusive_range 219 @type inclusive_range :: Range.t() | char 220 @type exclusive_range :: {:not, Range.t()} | {:not, char} 221 @type min_and_max :: {:min, non_neg_integer} | {:max, pos_integer} 222 @type call :: mfargs | fargs | atom 223 @type mfargs :: {module, atom, args :: [term]} 224 @type fargs :: {atom, args :: [term]} 225 @type gen_times :: Range.t() | non_neg_integer | nil 226 @type gen_weights :: [pos_integer] | nil 227 @type opts :: Keyword.t() 228 229 # Steps to add a new combinator: 230 # 231 # 1. Update the combinator type below 232 # 2. Update the compiler with combinator 233 # 3. Update the compiler with label step 234 # 235 @typep combinator :: bound_combinator | maybe_bound_combinator | unbound_combinator 236 237 @typep bound_combinator :: 238 {:bin_segment, [inclusive_range], [exclusive_range], bin_modifier} 239 | {:string, binary} 240 | :eos 241 242 @typep maybe_bound_combinator :: 243 {:label, t, binary} 244 | {:traverse, t, :pre | :post | :constant, [mfargs]} 245 246 @typep unbound_combinator :: 247 {:choice, [t], gen_weights} 248 | {:eventually, t} 249 | {:lookahead, t, :positive | :negative} 250 | {:parsec, atom | {module, atom}} 251 | {:repeat, t, mfargs, gen_times} 252 | {:times, t, pos_integer} 253 254 @doc ~S""" 255 Generate a random binary from the given parsec. 256 257 Let's see an example: 258 259 import NimbleParsec 260 generate(choice([string("foo"), string("bar")])) 261 262 The command above will return either "foo" or "bar". `generate/1` 263 is often used with pre-defined parsecs. In this case, the 264 `:export_metadata` flag must be set: 265 266 defmodule SomeModule do 267 import NimbleParsec 268 defparsec :parse, 269 choice([string("foo"), string("bar")]), 270 export_metadata: true 271 end 272 273 # Reference the parsec and generate from it 274 NimbleParsec.parsec({SomeModule, :parse}) 275 |> NimbleParsec.generate() 276 |> IO.puts() 277 278 `generate/1` can often run forever for recursive algorithms. 279 Read the notes below and make use of the `gen_weight` and `gen_times` 280 option to certain parsecs to control the recursion depth. 281 282 ## Notes 283 284 This feature is currently experimental and may change in many ways. 285 Overall, there is no guarantee over the generated output, except 286 that it will generate a binary that is parseable by the parsec 287 itself, but even this guarantee may be broken by parsers that have 288 custom validations. Keep in mind the following: 289 290 * `generate/1` is not compatible with NimbleParsec's dumped via 291 `mix nimble_parsec.compile`; 292 293 * `parsec/2` requires the referenced parsec to set `export_metadata: true` 294 on its definition; 295 296 * `choice/2` will be generated evenly. You can pass `:gen_weights` 297 as a list of positive integer weights to balance your choices. 298 This is particularly important for recursive algorithms; 299 300 * `repeat/2` and `repeat_while/3` will repeat between 0 and 3 times unless 301 a `:gen_times` option is given to these operations. `times/3` without a `:max` 302 will also additionally repeat between 0 and 3 times unless `:gen_times` is given. 303 The `:gen_times` option can either be an integer as the number of times to 304 repeat or a range where a random value in the range will be picked; 305 306 * `eventually/2` always generates the eventually parsec immediately; 307 308 * `lookahead/2` and `lookahead_not/2` are simply discarded; 309 310 * Validations done in any of the traverse definitions are not taken into account 311 by the generator. Therefore, if a parsec does validations, the generator may 312 generate binaries invalid to said parsec; 313 314 """ 315 def generate(parsecs) do 316 parsecs 317 |> Enum.reverse() 318 |> generate(nil, []) 319 |> elem(0) 320 |> IO.iodata_to_binary() 321 end 322 323 defp generate([{:parsec, fun} | _parsecs], nil, _acc) when is_atom(fun) do 324 raise "cannot generate parsec(#{inspect(fun)}), use a remote parsec instead" 325 end 326 327 defp generate([{:parsec, fun} | parsecs], mod, acc) when is_atom(fun) do 328 generate([{:parsec, {mod, fun}} | parsecs], mod, acc) 329 end 330 331 defp generate([{:parsec, {mod, fun}} | outer_parsecs], outer_mod, acc) do 332 {gen, _} = generate(gen_export(mod, fun), mod, []) 333 generate(outer_parsecs, outer_mod, [gen | acc]) 334 end 335 336 defp generate([{:string, string} | parsecs], mod, acc) do 337 generate(parsecs, mod, [string | acc]) 338 end 339 340 defp generate([{:bin_segment, inclusive, exclusive, modifier} | parsecs], mod, acc) do 341 gen = gen_bin_segment(inclusive, exclusive) 342 343 gen = 344 if modifier == :integer, 345 do: gen, 346 else: :unicode.characters_to_binary([gen], :unicode, modifier) 347 348 generate(parsecs, mod, [gen | acc]) 349 end 350 351 defp generate([:eos | parsecs], mod, acc) do 352 if parsecs == [] do 353 generate([], mod, acc) 354 else 355 raise ArgumentError, "found :eos not at the end of parsecs" 356 end 357 end 358 359 defp generate([{:traverse, t, _, _} | parsecs], mod, acc) do 360 generate(t ++ parsecs, mod, acc) 361 end 362 363 defp generate([{:label, t, _} | parsecs], mod, acc) do 364 generate(t ++ parsecs, mod, acc) 365 end 366 367 defp generate([{:choice, choices, weights} | parsecs], mod, acc) do 368 pick = if weights, do: weighted_random(choices, weights), else: list_random(choices) 369 {gen, _aborted?} = generate(pick, mod, []) 370 generate(parsecs, mod, [gen | acc]) 371 end 372 373 defp generate([{:lookahead, _, _} | parsecs], mod, acc) do 374 generate(parsecs, mod, acc) 375 end 376 377 defp generate([{:repeat, t, _, gen} | parsecs], mod, acc) do 378 generate(parsecs, mod, gen_times(t, int_random(gen), mod, acc)) 379 end 380 381 defp generate([{:times, t, max} | parsecs], mod, acc) do 382 generate(parsecs, mod, gen_times(t, Enum.random(0..max), mod, acc)) 383 end 384 385 defp generate([], _mod, acc), do: {Enum.reverse(acc), false} 386 387 defp gen_export(mod, fun) do 388 unless Code.ensure_loaded?(mod) do 389 raise "cannot handle parsec(#{inspect({mod, fun})}) because #{inspect(mod)} is not available" 390 end 391 392 try do 393 mod.__nimble_parsec__(fun) 394 rescue 395 _ -> 396 raise "cannot handle parsec(#{inspect({mod, fun})}) because #{inspect(mod)} " <> 397 "did not set :export_metadata when defining #{fun}" 398 end 399 end 400 401 defp gen_times(_t, 0, _mod, acc), do: acc 402 403 defp gen_times(t, n, mod, acc) do 404 case generate(t, mod, []) do 405 {gen, true} -> [gen | acc] 406 {gen, false} -> gen_times(t, n - 1, mod, [gen | acc]) 407 end 408 end 409 410 defp gen_bin_segment(inclusive, exclusive) do 411 gen = 412 if(inclusive == [], do: [0..255], else: inclusive) 413 |> list_random() 414 |> int_random() 415 416 if Enum.any?(exclusive, &exclude_bin_segment?(&1, gen)) do 417 gen_bin_segment(inclusive, exclusive) 418 else 419 gen 420 end 421 end 422 423 defp exclude_bin_segment?({:not, min..max}, gen), do: gen >= min and gen <= max 424 defp exclude_bin_segment?({:not, char}, gen) when is_integer(char), do: char == gen 425 426 defp int_random(nil), do: Enum.random(0..3) 427 defp int_random(_.._ = range), do: Enum.random(range) 428 defp int_random(int) when is_integer(int), do: int 429 430 # Enum.random uses reservoir sampling but our lists are short, so we use length + fetch! 431 defp list_random(list) when is_list(list), 432 do: Enum.fetch!(list, :rand.uniform(length(list)) - 1) 433 434 defp weighted_random(list, weights) do 435 weighted_random(list, weights, :rand.uniform(Enum.sum(weights))) 436 end 437 438 defp weighted_random([elem | _], [weight | _], chosen) when chosen <= weight, 439 do: elem 440 441 defp weighted_random([_ | list], [weight | weights], chosen), 442 do: weighted_random(list, weights, chosen - weight) 443 444 @doc ~S""" 445 Returns an empty combinator. 446 447 An empty combinator cannot be compiled on its own. 448 """ 449 @spec empty() :: t 450 def empty() do 451 [] 452 end 453 454 @doc """ 455 Invokes an already compiled combinator with name `name` in the 456 same module. 457 458 Every parser defined via `defparsec/3` or `defparsecp/3` can be 459 used as combinators. However, the `defparsec/3` and `defparsecp/3` 460 functions also define an entry-point parsing function, as implied 461 by their names. If you want to define a combinator with the sole 462 purpose of using it in combinator, use `defcombinatorp/3` instead. 463 464 ## Use cases 465 466 `parsec/2` is useful to implement recursive definitions. 467 468 Note while `parsec/2` can be used to compose smaller combinators, 469 the preferred mechanism for doing composition is via regular functions 470 and not via `parsec/2`. Let's see a practical example. Imagine 471 that you have this module: 472 473 defmodule MyParser do 474 import NimbleParsec 475 476 date = 477 integer(4) 478 |> ignore(string("-")) 479 |> integer(2) 480 |> ignore(string("-")) 481 |> integer(2) 482 483 time = 484 integer(2) 485 |> ignore(string(":")) 486 |> integer(2) 487 |> ignore(string(":")) 488 |> integer(2) 489 |> optional(string("Z")) 490 491 defparsec :datetime, date |> ignore(string("T")) |> concat(time), debug: true 492 end 493 494 Now imagine that you want to break `date` and `time` apart 495 into helper functions, as you use them in other occasions. 496 Generally speaking, you should **NOT** do this: 497 498 defmodule MyParser do 499 import NimbleParsec 500 501 defcombinatorp :date, 502 integer(4) 503 |> ignore(string("-")) 504 |> integer(2) 505 |> ignore(string("-")) 506 |> integer(2) 507 508 defcombinatorp :time, 509 integer(2) 510 |> ignore(string(":")) 511 |> integer(2) 512 |> ignore(string(":")) 513 |> integer(2) 514 |> optional(string("Z")) 515 516 defparsec :datetime, 517 parsec(:date) |> ignore(string("T")) |> concat(parsec(:time)) 518 end 519 520 The reason why the above is not recommended is because each 521 `parsec/2` combinator ends-up adding a stacktrace entry during 522 parsing, which affects the ability of `NimbleParsec` to optimize 523 code. If the goal is to compose combinators, you can do so 524 with modules and functions: 525 526 defmodule MyParser.Helpers do 527 import NimbleParsec 528 529 def date do 530 integer(4) 531 |> ignore(string("-")) 532 |> integer(2) 533 |> ignore(string("-")) 534 |> integer(2) 535 end 536 537 def time do 538 integer(2) 539 |> ignore(string(":")) 540 |> integer(2) 541 |> ignore(string(":")) 542 |> integer(2) 543 |> optional(string("Z")) 544 end 545 end 546 547 defmodule MyParser do 548 import NimbleParsec 549 import MyParser.Helpers 550 551 defparsec :datetime, 552 date() |> ignore(string("T")) |> concat(time()) 553 end 554 555 The implementation above will be able to compile to the most 556 efficient format as possible without forcing new stacktrace 557 entries. 558 559 The only situation where you should use `parsec/2` for composition 560 is when a large parser is used over and over again in a way 561 compilation times are high. In this sense, you can use `parsec/2` 562 to improve compilation time at the cost of runtime performance. 563 By using `parsec/2`, the tree size built at compile time will be 564 reduced although runtime performance is degraded as `parsec` 565 introduces a stacktrace entry. 566 567 ## Remote combinators 568 569 You can also reference combinators in other modules by passing 570 a tuple with the module name and a function to `parsec/2` as follows: 571 572 defmodule RemoteCombinatorModule do 573 defcombinator :upcase_unicode, utf8_char([...long, list, of, unicode, chars...]) 574 end 575 576 defmodule LocalModule do 577 # Parsec that depends on `:upcase_A` 578 defparsec :parsec_name, 579 ... 580 |> ascii_char([?a..?Z]) 581 |> parsec({RemoteCombinatorModule, :upcase_unicode}) 582 end 583 584 Remote combinators are useful when breaking the compilation of 585 large modules apart in order to use Elixir's ability to compile 586 modules in parallel. 587 588 ## Examples 589 590 A good example of using `parsec` is with recursive parsers. 591 A limited but recursive XML parser could be written as follows: 592 593 defmodule SimpleXML do 594 import NimbleParsec 595 596 tag = ascii_string([?a..?z, ?A..?Z], min: 1) 597 text = ascii_string([not: ?<], min: 1) 598 599 opening_tag = 600 ignore(string("<")) 601 |> concat(tag) 602 |> ignore(string(">")) 603 604 closing_tag = 605 ignore(string("</")) 606 |> concat(tag) 607 |> ignore(string(">")) 608 609 defparsec :xml, 610 opening_tag 611 |> repeat(lookahead_not(string("</")) |> choice([parsec(:xml), text])) 612 |> concat(closing_tag) 613 |> wrap() 614 end 615 616 SimpleXML.xml("<foo>bar</foo>") 617 #=> {:ok, [["foo", "bar", "foo"]], "", %{}, {1, 0}, 14} 618 619 In the example above, `defparsec/3` has defined the entry-point 620 parsing function as well as a combinator which we have invoked 621 with `parsec(:xml)`. 622 623 In many cases, however, you want to define recursive combinators 624 without the entry-point parsing function. We can do this by 625 replacing `defparsec/3` by `defcombinatorp`: 626 627 defcombinatorp :xml, 628 opening_tag 629 |> repeat(lookahead_not(string("</")) |> choice([parsec(:xml), text])) 630 |> concat(closing_tag) 631 |> wrap() 632 633 When using `defcombinatorp`, you can no longer invoke 634 `SimpleXML.xml(xml)` as there is no associated parsing function. 635 You can only access the combinator above via `parsec/2`. 636 """ 637 @spec parsec(name :: atom) :: t 638 @spec parsec(t, name :: atom) :: t 639 @spec parsec({module, function_name :: atom}) :: t 640 @spec parsec(t, {module, function_name :: atom}) :: t 641 def parsec(combinator \\ empty(), name) 642 643 def parsec(combinator, name) when is_combinator(combinator) and is_atom(name) do 644 [{:parsec, name} | combinator] 645 end 646 647 def parsec(combinator, {module, function}) 648 when is_combinator(combinator) and is_atom(module) and is_atom(function) do 649 [{:parsec, {module, function}} | combinator] 650 end 651 652 @doc ~S""" 653 Defines a single ASCII codepoint in the given ranges. 654 655 `ranges` is a list containing one of: 656 657 * a `min..max` range expressing supported codepoints 658 * a `codepoint` integer expressing a supported codepoint 659 * `{:not, min..max}` expressing not supported codepoints 660 * `{:not, codepoint}` expressing a not supported codepoint 661 662 ## Examples 663 664 defmodule MyParser do 665 import NimbleParsec 666 667 defparsec :digit_and_lowercase, 668 empty() 669 |> ascii_char([?0..?9]) 670 |> ascii_char([?a..?z]) 671 end 672 673 MyParser.digit_and_lowercase("1a") 674 #=> {:ok, [?1, ?a], "", %{}, {1, 0}, 2} 675 676 MyParser.digit_and_lowercase("a1") 677 #=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range 'a' to 'z'", "a1", %{}, {1, 0}, 0} 678 679 """ 680 @spec ascii_char([range]) :: t 681 @spec ascii_char(t, [range]) :: t 682 def ascii_char(combinator \\ empty(), ranges) 683 when is_combinator(combinator) and is_list(ranges) do 684 {inclusive, exclusive} = split_ranges!(ranges, "ascii_char") 685 bin_segment(combinator, inclusive, exclusive, :integer) 686 end 687 688 @doc ~S""" 689 Defines a single UTF-8 codepoint in the given ranges. 690 691 `ranges` is a list containing one of: 692 693 * a `min..max` range expressing supported codepoints 694 * a `codepoint` integer expressing a supported codepoint 695 * `{:not, min..max}` expressing not supported codepoints 696 * `{:not, codepoint}` expressing a not supported codepoint 697 698 ## Examples 699 700 defmodule MyParser do 701 import NimbleParsec 702 703 defparsec :digit_and_utf8, 704 empty() 705 |> utf8_char([?0..?9]) 706 |> utf8_char([]) 707 end 708 709 MyParser.digit_and_utf8("1é") 710 #=> {:ok, [?1, ?é], "", %{}, {1, 0}, 2} 711 712 MyParser.digit_and_utf8("a1") 713 #=> {:error, "expected utf8 codepoint in the range '0' to '9', followed by utf8 codepoint", "a1", %{}, {1, 0}, 0} 714 715 """ 716 @spec utf8_char([range]) :: t 717 @spec utf8_char(t, [range]) :: t 718 def utf8_char(combinator \\ empty(), ranges) 719 when is_combinator(combinator) and is_list(ranges) do 720 {inclusive, exclusive} = split_ranges!(ranges, "utf8_char") 721 bin_segment(combinator, inclusive, exclusive, :utf8) 722 end 723 724 @doc ~S""" 725 Adds a label to the combinator to be used in error reports. 726 727 ## Examples 728 729 defmodule MyParser do 730 import NimbleParsec 731 732 defparsec :digit_and_lowercase, 733 empty() 734 |> ascii_char([?0..?9]) 735 |> ascii_char([?a..?z]) 736 |> label("digit followed by lowercase letter") 737 end 738 739 MyParser.digit_and_lowercase("1a") 740 #=> {:ok, [?1, ?a], "", %{}, {1, 0}, 2} 741 742 MyParser.digit_and_lowercase("a1") 743 #=> {:error, "expected a digit followed by lowercase letter", "a1", %{}, {1, 0}, 0} 744 745 """ 746 @spec label(t, String.t()) :: t 747 @spec label(t, t, String.t()) :: t 748 def label(combinator \\ empty(), to_label, label) 749 when is_combinator(combinator) and is_combinator(to_label) and is_binary(label) do 750 non_empty!(to_label, "label") 751 [{:label, Enum.reverse(to_label), label} | combinator] 752 end 753 754 @doc ~S""" 755 Defines an integer combinator with of exact length or `min` and `max` 756 length. 757 758 If you want an integer of unknown size, use `integer(min: 1)`. 759 760 This combinator does not parse the sign and is always on base 10. 761 762 ## Examples 763 764 With exact length: 765 766 defmodule MyParser do 767 import NimbleParsec 768 769 defparsec :two_digits_integer, integer(2) 770 end 771 772 MyParser.two_digits_integer("123") 773 #=> {:ok, [12], "3", %{}, {1, 0}, 2} 774 775 MyParser.two_digits_integer("1a3") 776 #=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range '0' to '9'", "1a3", %{}, {1, 0}, 0} 777 778 With min and max: 779 780 defmodule MyParser do 781 import NimbleParsec 782 783 defparsec :two_digits_integer, integer(min: 2, max: 4) 784 end 785 786 MyParser.two_digits_integer("123") 787 #=> {:ok, [123], "", %{}, {1, 0}, 2} 788 789 MyParser.two_digits_integer("1a3") 790 #=> {:error, "expected ASCII character in the range '0' to '9', followed by ASCII character in the range '0' to '9'", "1a3", %{}, {1, 0}, 0} 791 792 If the size of the integer has a min and max close to each other, such as 793 from 2 to 4 or from 1 to 2, using choice may emit more efficient code: 794 795 choice([integer(4), integer(3), integer(2)]) 796 797 Note you should start from bigger to smaller. 798 """ 799 @spec integer(pos_integer | [min_and_max]) :: t 800 @spec integer(t, pos_integer | [min_and_max]) :: t 801 def integer(combinator \\ empty(), count_or_opts) 802 when is_combinator(combinator) and (is_integer(count_or_opts) or is_list(count_or_opts)) do 803 validate_min_and_max!(count_or_opts, 1) 804 805 min_max_compile_runtime_chars( 806 combinator, 807 ascii_char([?0..?9]), 808 count_or_opts, 809 :__compile_integer__, 810 :__runtime_integer__, 811 [] 812 ) 813 end 814 815 @doc ~S""" 816 Defines an ASCII string combinator with an exact length or `min` and `max` 817 length. 818 819 The `ranges` specify the allowed characters in the ASCII string. 820 See `ascii_char/2` for more information. 821 822 If you want a string of unknown size, use `ascii_string(ranges, min: 1)`. 823 If you want a literal string, use `string/2`. 824 825 ## Examples 826 827 defmodule MyParser do 828 import NimbleParsec 829 830 defparsec :two_lowercase_letters, ascii_string([?a..?z], 2) 831 end 832 833 MyParser.two_lowercase_letters("abc") 834 #=> {:ok, ["ab"], "c", %{}, {1, 0}, 2} 835 836 """ 837 @spec ascii_string([range], pos_integer | [min_and_max]) :: t 838 @spec ascii_string(t, [range], pos_integer | [min_and_max]) :: t 839 def ascii_string(combinator \\ empty(), range, count_or_opts) 840 when is_combinator(combinator) and is_list(range) and 841 (is_integer(count_or_opts) or is_list(count_or_opts)) do 842 min_max_compile_runtime_chars( 843 combinator, 844 ascii_char(range), 845 count_or_opts, 846 :__compile_string__, 847 :__runtime_string__, 848 [quote(do: integer)] 849 ) 850 end 851 852 @doc ~S""" 853 Defines an UTF8 string combinator with of exact length or `min` and `max` 854 codepoint length. 855 856 The `ranges` specify the allowed characters in the UTF8 string. 857 See `utf8_char/2` for more information. 858 859 If you want a string of unknown size, use `utf8_string(ranges, min: 1)`. 860 If you want a literal string, use `string/2`. 861 862 Note that the combinator matches on codepoints, not graphemes. Therefore 863 results may vary depending on whether the input is in `nfc` or `nfd` 864 normalized form. 865 866 ## Examples 867 868 defmodule MyParser do 869 import NimbleParsec 870 871 defparsec :two_letters, utf8_string([], 2) 872 end 873 874 MyParser.two_letters("áé") 875 #=> {:ok, ["áé"], "", %{}, {1, 0}, 3} 876 877 """ 878 @spec utf8_string([range], pos_integer | [min_and_max]) :: t 879 @spec utf8_string(t, [range], pos_integer | [min_and_max]) :: t 880 def utf8_string(combinator \\ empty(), range, count_or_opts) 881 when is_combinator(combinator) and is_list(range) and 882 (is_integer(count_or_opts) or is_list(count_or_opts)) do 883 min_max_compile_runtime_chars( 884 combinator, 885 utf8_char(range), 886 count_or_opts, 887 :__compile_string__, 888 :__runtime_string__, 889 [quote(do: utf8)] 890 ) 891 end 892 893 @doc ~S""" 894 Defines an end of string combinator. 895 896 The end of string does not produce a token and can be parsed multiple times. 897 This function is useful to avoid having to check for an empty remainder after 898 a successful parse. 899 900 ## Examples 901 902 defmodule MyParser do 903 import NimbleParsec 904 905 defparsec :letter_pairs, utf8_string([], 2) |> repeat() |> eos() 906 end 907 908 MyParser.letter_pairs("hi") 909 #=> {:ok, ["hi"], "", %{}, {1, 0}, 2} 910 911 MyParser.letter_pairs("hello") 912 #=> {:error, "expected end of string", "o", %{}, {1, 0}, 4} 913 """ 914 @spec eos :: t 915 @spec eos(t) :: t 916 def eos(combinator \\ empty()) do 917 [:eos | combinator] 918 end 919 920 @doc ~S""" 921 Concatenates two combinators. 922 923 ## Examples 924 925 defmodule MyParser do 926 import NimbleParsec 927 928 defparsec :digit_upper_lower_plus, 929 concat( 930 concat(ascii_char([?0..?9]), ascii_char([?A..?Z])), 931 concat(ascii_char([?a..?z]), ascii_char([?+..?+])) 932 ) 933 end 934 935 MyParser.digit_upper_lower_plus("1Az+") 936 #=> {:ok, [?1, ?A, ?z, ?+], "", %{}, {1, 0}, 4} 937 938 """ 939 @spec concat(t, t) :: t 940 def concat(left, right) when is_combinator(left) and is_combinator(right) do 941 right ++ left 942 end 943 944 @doc """ 945 Duplicates the combinator `to_duplicate` `n` times. 946 """ 947 @spec duplicate(t, non_neg_integer) :: t 948 @spec duplicate(t, t, non_neg_integer) :: t 949 def duplicate(combinator \\ empty(), to_duplicate, n) 950 951 def duplicate(combinator, to_duplicate, 0) 952 when is_combinator(combinator) and is_combinator(to_duplicate) do 953 combinator 954 end 955 956 def duplicate(combinator, to_duplicate, n) 957 when is_combinator(combinator) and is_combinator(to_duplicate) and is_integer(n) and n >= 1 do 958 Enum.reduce(1..n, combinator, fn _, acc -> to_duplicate ++ acc end) 959 end 960 961 @doc """ 962 Puts the result of the given combinator as the first element 963 of a tuple with the `byte_offset` as second element. 964 965 `byte_offset` is a non-negative integer. 966 """ 967 @spec byte_offset(t) :: t 968 @spec byte_offset(t, t) :: t 969 def byte_offset(combinator \\ empty(), to_wrap) 970 when is_combinator(combinator) and is_combinator(to_wrap) do 971 quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__byte_offset__, []}) 972 end 973 974 @doc """ 975 Puts the result of the given combinator as the first element 976 of a tuple with the `line` as second element. 977 978 `line` is a tuple where the first element is the current line 979 and the second element is the byte offset immediately after 980 the newline. 981 """ 982 @spec line(t) :: t 983 @spec line(t, t) :: t 984 def line(combinator \\ empty(), to_wrap) 985 when is_combinator(combinator) and is_combinator(to_wrap) do 986 quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__line__, []}) 987 end 988 989 @doc ~S""" 990 Traverses the combinator results with the remote or local function `call`. 991 992 `call` is either a `{module, function, args}` representing 993 a remote call, a `{function, args}` representing a local call 994 or an atom `function` representing `{function, []}`. 995 996 The function given in `call` will receive 5 additional arguments. 997 The rest of the parsed binary, the parser results to be post_traversed, 998 the parser context, the current line and the current offset will 999 be prepended to the given `args`. The `args` will be injected at 1000 the compile site and therefore must be escapable via `Macro.escape/1`. 1001 1002 The line and offset will represent the location after the combinators. 1003 To retrieve the position before the combinators, use `pre_traverse/3`. 1004 1005 The `call` must return a tuple `{rest, acc, context}` with list of 1006 results to be added to the accumulator as first argument and a context 1007 as second argument. It may also return `{:error, reason}` to stop 1008 processing. Notice the received results are in reverse order and 1009 must be returned in reverse order too. 1010 1011 The number of elements returned does not need to be 1012 the same as the number of elements given. 1013 1014 This is a low-level function for changing the parsed result. 1015 On top of this function, other functions are built, such as 1016 `map/3` if you want to map over each individual element and 1017 not worry about ordering, `reduce/3` to reduce all elements 1018 into a single one, `replace/3` if you want to replace the 1019 parsed result by a single value and `ignore/2` if you want to 1020 ignore the parsed result. 1021 1022 ## Examples 1023 1024 defmodule MyParser do 1025 import NimbleParsec 1026 1027 defparsec :letters_to_chars, 1028 ascii_char([?a..?z]) 1029 |> ascii_char([?a..?z]) 1030 |> ascii_char([?a..?z]) 1031 |> post_traverse({:join_and_wrap, ["-"]}) 1032 1033 defp join_and_wrap(rest, args, context, _line, _offset, joiner) do 1034 {rest, args |> Enum.join(joiner) |> List.wrap(), context} 1035 end 1036 end 1037 1038 MyParser.letters_to_chars("abc") 1039 #=> {:ok, ["99-98-97"], "", %{}, {1, 0}, 3} 1040 1041 """ 1042 @spec post_traverse(t, call) :: t 1043 @spec post_traverse(t, t, call) :: t 1044 def post_traverse(combinator \\ empty(), to_post_traverse, call) 1045 when is_combinator(combinator) and is_combinator(to_post_traverse) do 1046 compile_call!([], call, "post_traverse") 1047 quoted_post_traverse(combinator, to_post_traverse, {__MODULE__, :__post_traverse__, [call]}) 1048 end 1049 1050 @doc """ 1051 The same as `post_traverse/3` but receives the line and offset 1052 from before the wrapped combinators. 1053 1054 `post_traverse/3` should be preferred as it keeps less stack 1055 information. Use `pre_traverse/3` only if you have to access 1056 the line and offset from before the given combinators. 1057 """ 1058 @spec pre_traverse(t, call) :: t 1059 @spec pre_traverse(t, t, call) :: t 1060 def pre_traverse(combinator \\ empty(), to_pre_traverse, call) 1061 when is_combinator(combinator) and is_combinator(to_pre_traverse) do 1062 compile_call!([], call, "pre_traverse") 1063 quoted_pre_traverse(combinator, to_pre_traverse, {__MODULE__, :__pre_traverse__, [call]}) 1064 end 1065 1066 @doc ~S""" 1067 Checks if a combinator is ahead. 1068 1069 If it succeeds, it continues as usual, otherwise it aborts the 1070 closest `choice/2`, `repeat/2`, etc. If there is no closest 1071 operation to abort, then it errors. 1072 1073 Note a lookahead never changes the accumulated output nor the 1074 context. 1075 1076 ## Examples 1077 1078 For example, imagine you want to parse a language that has the 1079 keywords "if" and "while" and identifiers made of any letters or 1080 number, where keywords and identifiers can be separated by a 1081 single white space: 1082 1083 defmodule IfWhileLang do 1084 import NimbleParsec 1085 1086 keyword = 1087 choice([ 1088 string("if") |> replace(:if), 1089 string("while") |> replace(:while) 1090 ]) 1091 1092 identifier = 1093 ascii_string([?a..?z, ?A..?Z, ?0..?9], min: 1) 1094 1095 defparsec :expr, repeat(choice([keyword, identifier]) |> optional(string(" "))) 1096 end 1097 1098 The issue with the implementation above is that the following 1099 will parse: 1100 1101 IfWhileLang.expr("iffy") 1102 {:ok, [:if, "fy"], "", %{}, {1, 0}, 4} 1103 1104 However, "iffy" should be treated as a full identifier. We could 1105 solve this by inverting the order of `keyword` and `identifier` 1106 in `:expr` but that means "if" itself will be considered an identifier 1107 and not a keyword. To solve this, we need lookaheads. 1108 1109 One option is to check that after the keyword we either have an 1110 empty string OR the end of the string: 1111 1112 keyword = 1113 choice([ 1114 string("if") |> replace(:if), 1115 string("while") |> replace(:while) 1116 ]) 1117 |> lookahead(choice([string(" "), eos()])) 1118 1119 However, in this case, a negative lookahead may be clearer, 1120 and we can assert that we don't have any identifier character after 1121 the keyword: 1122 1123 keyword = 1124 choice([ 1125 string("if") |> replace(:if), 1126 string("while") |> replace(:while) 1127 ]) 1128 |> lookahead_not(ascii_char([?a..?z, ?A..?Z, ?0..?9])) 1129 1130 Now we get the desired result back: 1131 1132 IfWhileLang.expr("iffy") 1133 #=> {:ok, ["iffy"], "", %{}, {1, 0}, 4} 1134 1135 IfWhileLang.expr("if fy") 1136 #=> {:ok, [:if, " ", "fy"], "", %{}, {1, 0}, 5} 1137 1138 """ 1139 @spec lookahead(t) :: t 1140 @spec lookahead(t, t) :: t 1141 def lookahead(combinator \\ empty(), to_lookahead) 1142 when is_combinator(combinator) and is_combinator(to_lookahead) do 1143 [{:lookahead, Enum.reverse(to_lookahead), :positive} | combinator] 1144 end 1145 1146 @doc ~S""" 1147 Checks if a combinator is not ahead. 1148 1149 If it succeeds, it aborts the closest `choice/2`, `repeat/2`, etc. 1150 Otherwise it continues as usual. If there is no closest operation 1151 to abort, then it errors. 1152 1153 Note a lookahead never changes the accumulated output nor the 1154 context. 1155 1156 For an example, see `lookahead/2`. 1157 """ 1158 @spec lookahead_not(t) :: t 1159 @spec lookahead_not(t, t) :: t 1160 def lookahead_not(combinator \\ empty(), to_lookahead) 1161 when is_combinator(combinator) and is_combinator(to_lookahead) do 1162 [{:lookahead, Enum.reverse(to_lookahead), :negative} | combinator] 1163 end 1164 1165 @doc """ 1166 Invokes `call` to emit the AST that post traverses the `to_post_traverse` 1167 combinator results. 1168 1169 This is similar to `post_traverse/3`. In `post_traverse/3`, `call` is 1170 invoked to process the combinator results. In here, it is invoked to 1171 emit AST that in its turn will process the combinator results. 1172 The invoked function must return the same types as `post_traverse/3`. 1173 1174 `call` is a `{module, function, args}` and it will receive 5 1175 additional arguments. The AST representation of the rest of the 1176 parsed binary, the parser results, context, line and offset will 1177 be prepended to `args`. `call` is invoked at compile time and is 1178 useful in combinators that avoid injecting runtime dependencies. 1179 1180 The line and offset will represent the location after the combinators. 1181 To retrieve the position before the combinators, use `quoted_pre_traverse/3`. 1182 1183 This function must be used only when you want to emit code that 1184 has no runtime dependencies in other modules. In most cases, 1185 using `post_traverse/3` is better, since it doesn't work on ASTs 1186 and instead works at runtime. 1187 """ 1188 @spec quoted_post_traverse(t, mfargs) :: t 1189 @spec quoted_post_traverse(t, t, mfargs) :: t 1190 def quoted_post_traverse(combinator \\ empty(), to_post_traverse, {_, _, _} = call) 1191 when is_combinator(combinator) and is_combinator(to_post_traverse) do 1192 quoted_traverse(combinator, to_post_traverse, :post, call) 1193 end 1194 1195 @doc """ 1196 The same as `quoted_post_traverse/3` but receives the line and offset 1197 from before the wrapped combinators. 1198 1199 `quoted_post_traverse/3` should be preferred as it keeps less stack 1200 information. Use `quoted_pre_traverse/3` only if you have to access 1201 the line and offset from before the given combinators. 1202 """ 1203 @spec quoted_pre_traverse(t, mfargs) :: t 1204 @spec quoted_pre_traverse(t, t, mfargs) :: t 1205 def quoted_pre_traverse(combinator \\ empty(), to_pre_traverse, {_, _, _} = call) 1206 when is_combinator(combinator) and is_combinator(to_pre_traverse) do 1207 quoted_traverse(combinator, to_pre_traverse, :pre, call) 1208 end 1209 1210 @doc ~S""" 1211 Maps over the combinator results with the remote or local function in `call`. 1212 1213 `call` is either a `{module, function, args}` representing 1214 a remote call, a `{function, args}` representing a local call 1215 or an atom `function` representing `{function, []}`. 1216 1217 Each parser result will be invoked individually for the `call`. 1218 Each result be prepended to the given `args`. The `args` will 1219 be injected at the compile site and therefore must be escapable 1220 via `Macro.escape/1`. 1221 1222 See `post_traverse/3` for a low level version of this function. 1223 1224 ## Examples 1225 1226 defmodule MyParser do 1227 import NimbleParsec 1228 1229 defparsec :letters_to_string_chars, 1230 ascii_char([?a..?z]) 1231 |> ascii_char([?a..?z]) 1232 |> ascii_char([?a..?z]) 1233 |> map({Integer, :to_string, []}) 1234 end 1235 1236 MyParser.letters_to_string_chars("abc") 1237 #=> {:ok, ["97", "98", "99"], "", %{}, {1, 0}, 3} 1238 """ 1239 @spec map(t, call) :: t 1240 @spec map(t, t, call) :: t 1241 def map(combinator \\ empty(), to_map, call) 1242 when is_combinator(combinator) and is_combinator(to_map) do 1243 var = Macro.var(:var, __MODULE__) 1244 call = compile_call!([var], call, "map") 1245 quoted_post_traverse(combinator, to_map, {__MODULE__, :__map__, [var, call]}) 1246 end 1247 1248 @doc ~S""" 1249 Reduces over the combinator results with the remote or local function in `call`. 1250 1251 `call` is either a `{module, function, args}` representing 1252 a remote call, a `{function, args}` representing a local call 1253 or an atom `function` representing `{function, []}`. 1254 1255 The parser results to be reduced will be prepended to the 1256 given `args`. The `args` will be injected at the compile site 1257 and therefore must be escapable via `Macro.escape/1`. 1258 1259 See `post_traverse/3` for a low level version of this function. 1260 1261 ## Examples 1262 1263 defmodule MyParser do 1264 import NimbleParsec 1265 1266 defparsec :letters_to_reduced_chars, 1267 ascii_char([?a..?z]) 1268 |> ascii_char([?a..?z]) 1269 |> ascii_char([?a..?z]) 1270 |> reduce({Enum, :join, ["-"]}) 1271 end 1272 1273 MyParser.letters_to_reduced_chars("abc") 1274 #=> {:ok, ["97-98-99"], "", %{}, {1, 0}, 3} 1275 """ 1276 @spec reduce(t, call) :: t 1277 @spec reduce(t, t, call) :: t 1278 def reduce(combinator \\ empty(), to_reduce, call) 1279 when is_combinator(combinator) and is_combinator(to_reduce) do 1280 compile_call!([], call, "reduce") 1281 quoted_post_traverse(combinator, to_reduce, {__MODULE__, :__reduce__, [call]}) 1282 end 1283 1284 @doc """ 1285 Wraps the results of the given combinator in `to_wrap` in a list. 1286 """ 1287 @spec wrap(t) :: t 1288 @spec wrap(t, t) :: t 1289 def wrap(combinator \\ empty(), to_wrap) 1290 when is_combinator(combinator) and is_combinator(to_wrap) do 1291 quoted_post_traverse(combinator, to_wrap, {__MODULE__, :__wrap__, []}) 1292 end 1293 1294 @doc """ 1295 Tags the result of the given combinator in `to_tag` in a tuple with 1296 `tag` as first element. 1297 1298 ## Examples 1299 1300 defmodule MyParser do 1301 import NimbleParsec 1302 1303 defparsec :integer, integer(min: 1) |> tag(:integer) 1304 end 1305 1306 MyParser.integer("1234") 1307 #=> {:ok, [integer: [1234]], "", %{}, {1, 0}, 4} 1308 1309 Notice, however, that the integer result is wrapped in a list, because 1310 the parser is expected to emit multiple tokens. When you are sure that 1311 only a single token is emitted, you should use `unwrap_and_tag/3`. 1312 """ 1313 @spec tag(t, term) :: t 1314 @spec tag(t, t, term) :: t 1315 def tag(combinator \\ empty(), to_tag, tag) 1316 when is_combinator(combinator) and is_combinator(to_tag) do 1317 quoted_post_traverse(combinator, to_tag, {__MODULE__, :__tag__, [Macro.escape(tag)]}) 1318 end 1319 1320 @doc """ 1321 Unwraps and tags the result of the given combinator in `to_tag` in a tuple with 1322 `tag` as first element. 1323 1324 ## Examples 1325 1326 defmodule MyParser do 1327 import NimbleParsec 1328 1329 defparsec :integer, integer(min: 1) |> unwrap_and_tag(:integer) 1330 end 1331 1332 MyParser.integer("1234") 1333 #=> {:ok, [integer: 1234], "", %{}, {1, 0}, 4} 1334 1335 1336 In case the combinator emits greater than one token, an error will be raised. 1337 See `tag/3` for more information. 1338 """ 1339 @spec unwrap_and_tag(t, term) :: t 1340 @spec unwrap_and_tag(t, t, term) :: t 1341 def unwrap_and_tag(combinator \\ empty(), to_tag, tag) 1342 when is_combinator(combinator) and is_combinator(to_tag) do 1343 quoted_post_traverse( 1344 combinator, 1345 to_tag, 1346 {__MODULE__, :__unwrap_and_tag__, [Macro.escape(tag)]} 1347 ) 1348 end 1349 1350 @doc """ 1351 Inspects the combinator state given to `to_debug` with the given `opts`. 1352 """ 1353 @spec debug(t) :: t 1354 @spec debug(t, t) :: t 1355 def debug(combinator \\ empty(), to_debug) 1356 when is_combinator(combinator) and is_combinator(to_debug) do 1357 quoted_pre_traverse(combinator, to_debug, {__MODULE__, :__debug__, []}) 1358 end 1359 1360 @doc ~S""" 1361 Defines a string binary value. 1362 1363 ## Examples 1364 1365 defmodule MyParser do 1366 import NimbleParsec 1367 1368 defparsec :string_t, string("T") 1369 end 1370 1371 MyParser.string_t("T") 1372 #=> {:ok, ["T"], "", %{}, {1, 0}, 1} 1373 1374 MyParser.string_t("not T") 1375 #=> {:error, "expected a string \"T\"", "not T", %{}, {1, 0}, 0} 1376 1377 """ 1378 @spec string(binary) :: t 1379 @spec string(t, binary) :: t 1380 def string(combinator \\ empty(), binary) 1381 when is_combinator(combinator) and is_binary(binary) do 1382 [{:string, binary} | combinator] 1383 end 1384 1385 @doc """ 1386 Ignores the output of combinator given in `to_ignore`. 1387 1388 ## Examples 1389 1390 defmodule MyParser do 1391 import NimbleParsec 1392 1393 defparsec :ignorable, string("T") |> ignore() |> integer(2, 2) 1394 end 1395 1396 MyParser.ignorable("T12") 1397 #=> {:ok, [12], "", %{}, {1, 0}, 2} 1398 1399 """ 1400 @spec ignore(t) :: t 1401 @spec ignore(t, t) :: t 1402 def ignore(combinator \\ empty(), to_ignore) 1403 when is_combinator(combinator) and is_combinator(to_ignore) do 1404 if to_ignore == empty() do 1405 to_ignore 1406 else 1407 quoted_constant_traverse(combinator, to_ignore, {__MODULE__, :__constant__, [[]]}) 1408 end 1409 end 1410 1411 @doc """ 1412 Replaces the output of combinator given in `to_replace` by a single value. 1413 1414 The `value` will be injected at the compile site 1415 and therefore must be escapable via `Macro.escape/1`. 1416 1417 ## Examples 1418 1419 defmodule MyParser do 1420 import NimbleParsec 1421 1422 defparsec :replaceable, string("T") |> replace("OTHER") |> integer(2, 2) 1423 end 1424 1425 MyParser.replaceable("T12") 1426 #=> {:ok, ["OTHER", 12], "", %{}, {1, 0}, 2} 1427 1428 """ 1429 @spec replace(t, term) :: t 1430 @spec replace(t, t, term) :: t 1431 def replace(combinator \\ empty(), to_replace, value) 1432 when is_combinator(combinator) and is_combinator(to_replace) do 1433 value = Macro.escape(value) 1434 quoted_constant_traverse(combinator, to_replace, {__MODULE__, :__constant__, [[value]]}) 1435 end 1436 1437 @doc """ 1438 Allow the combinator given on `to_repeat` to appear zero or more times. 1439 1440 Beware! Since `repeat/2` allows zero entries, it cannot be used inside 1441 `choice/2`, because it will always succeed and may lead to unused function 1442 warnings since any further choice won't ever be attempted. For example, 1443 because `repeat/2` always succeeds, the `string/2` combinator below it 1444 won't ever run: 1445 1446 choice([ 1447 repeat(ascii_char([?a..?z])), 1448 string("OK") 1449 ]) 1450 1451 Instead of `repeat/2`, you may want to use `times/3` with the flags `:min` 1452 and `:max`. 1453 1454 Also beware! If you attempt to repeat a combinator that can match nothing, 1455 like `optional/2`, `repeat/2` will not terminate. For example, consider 1456 this combinator: 1457 1458 repeat(optional(utf8_char([?a]))) 1459 1460 This combinator will never terminate because `repeat/2` chooses the empty 1461 option of `optional/2` every time. Since the goal of the parser above is 1462 to parse 0 or more `?a` characters, it can be represented by 1463 `repeat(utf8_char([?a]))`, because `repeat/2` allows 0 or more matches. 1464 1465 ## Examples 1466 1467 defmodule MyParser do 1468 import NimbleParsec 1469 1470 defparsec :repeat_lower, repeat(ascii_char([?a..?z])) 1471 end 1472 1473 MyParser.repeat_lower("abcd") 1474 #=> {:ok, [?a, ?b, ?c, ?d], "", %{}, {1, 0}, 4} 1475 1476 MyParser.repeat_lower("1234") 1477 #=> {:ok, [], "1234", %{}, {1, 0}, 0} 1478 1479 """ 1480 @spec repeat(t) :: t 1481 @spec repeat(t, t) :: t 1482 @spec repeat(t, opts) :: t 1483 @spec repeat(t, t, opts) :: t 1484 def repeat(combinator \\ empty(), to_repeat, opts \\ []) 1485 when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do 1486 non_empty!(to_repeat, "repeat") 1487 quoted_repeat_while(combinator, to_repeat, {__MODULE__, :__cont_context__, []}, opts) 1488 end 1489 1490 @doc """ 1491 Marks the given combinator should appear eventually. 1492 1493 Any other data before the combinator appears is discarded. 1494 If the combinator never appears, then it is an error. 1495 1496 **Note:** this can be potentially a very expensive operation 1497 as it executes the given combinator byte by byte until finding 1498 an eventual match or ultimately failing. For example, if you 1499 are looking for an integer, it is preferrable to discard 1500 everything that is not an integer 1501 1502 ignore(ascii_string([not: ?0..?9])) 1503 1504 rather than eventually look for an integer 1505 1506 eventually(ascii_string([?0..?9])) 1507 1508 ## Examples 1509 1510 defmodule MyParser do 1511 import NimbleParsec 1512 1513 hour = integer(min: 1, max: 2) 1514 defparsec :extract_hour, eventually(hour) 1515 end 1516 1517 MyParser.extract_hour("let's meet at 12?") 1518 #=> {:ok, [12], "?", %{}, {1, 0}, 16} 1519 1520 """ 1521 @spec eventually(t) :: t 1522 @spec eventually(t, t) :: t 1523 def eventually(combinator \\ empty(), eventually) 1524 when is_combinator(combinator) and is_combinator(eventually) do 1525 non_empty!(eventually, "eventually") 1526 [{:eventually, Enum.reverse(eventually)} | combinator] 1527 end 1528 1529 @doc ~S""" 1530 Repeats while the given remote or local function `while` returns 1531 `{:cont, context}`. 1532 1533 If the combinator `to_repeat` stops matching, then the whole repeat 1534 loop stops successfully, hence it is important to assert the terminated 1535 value after repeating. 1536 1537 In case repetition should stop, `while` must return `{:halt, context}`. 1538 1539 `while` is either a `{module, function, args}` representing 1540 a remote call, a `{function, args}` representing a local call 1541 or an atom `function` representing `{function, []}`. 1542 1543 The function given in `while` will receive 4 additional arguments. 1544 The `rest` of the binary to be parsed, the parser context, the 1545 current line and the current offset will be prepended to the 1546 given `args`. The `args` will be injected at the compile site 1547 and therefore must be escapable via `Macro.escape/1`. 1548 1549 ## Examples 1550 1551 defmodule MyParser do 1552 import NimbleParsec 1553 1554 defparsec :string_with_quotes, 1555 ascii_char([?"]) 1556 |> repeat_while( 1557 choice([ 1558 ~S(\") |> string() |> replace(?"), 1559 utf8_char([]) 1560 ]), 1561 {:not_quote, []} 1562 ) 1563 |> ascii_char([?"]) 1564 |> reduce({List, :to_string, []}) 1565 1566 defp not_quote(<<?", _::binary>>, context, _, _), do: {:halt, context} 1567 defp not_quote(_, context, _, _), do: {:cont, context} 1568 end 1569 1570 MyParser.string_with_quotes(~S("string with quotes \" inside")) 1571 {:ok, ["\"string with quotes \" inside\""], "", %{}, {1, 0}, 30} 1572 1573 Note you can use `lookahead/2` and `lookahead_not/2` with 1574 `repeat/2` (instead of `repeat_while/3`) to write a combinator 1575 that repeats while a combinator matches (or does not match). 1576 For example, the same combinator above could be written as: 1577 1578 defmodule MyParser do 1579 import NimbleParsec 1580 1581 defparsec :string_with_quotes, 1582 ascii_char([?"]) 1583 |> repeat( 1584 lookahead_not(ascii_char([?"])) 1585 |> choice([ 1586 ~S(\") |> string() |> replace(?"), 1587 utf8_char([]) 1588 ]) 1589 ) 1590 |> reduce({List, :to_string, []}) 1591 end 1592 1593 MyParser.string_with_quotes(~S("string with quotes \" inside")) 1594 {:ok, ["\"string with quotes \" inside\""], "", %{}, {1, 0}, 30} 1595 1596 However, `repeat_while` is still useful when the condition to 1597 repeat comes from the context passed around. 1598 """ 1599 @spec repeat_while(t, call) :: t 1600 @spec repeat_while(t, t, call) :: t 1601 @spec repeat_while(t, t, call, opts) :: t 1602 def repeat_while(combinator \\ empty(), to_repeat, while, opts \\ []) 1603 when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do 1604 non_empty!(to_repeat, "repeat_while") 1605 compile_call!([], while, "repeat_while") 1606 quoted_repeat_while(combinator, to_repeat, {__MODULE__, :__repeat_while__, [while]}, opts) 1607 end 1608 1609 @doc """ 1610 Invokes `while` to emit the AST that will repeat `to_repeat` 1611 while the AST code returns `{:cont, context}`. 1612 1613 In case repetition should stop, `while` must return `{:halt, context}`. 1614 1615 `while` is a `{module, function, args}` and it will receive 4 1616 additional arguments. The AST representations of the binary to be 1617 parsed, context, line and offset will be prepended to `args`. `while` 1618 is invoked at compile time and is useful in combinators that avoid 1619 injecting runtime dependencies. 1620 """ 1621 @spec quoted_repeat_while(t, mfargs) :: t 1622 @spec quoted_repeat_while(t, t, mfargs) :: t 1623 @spec quoted_repeat_while(t, t, mfargs, opts) :: t 1624 def quoted_repeat_while(combinator \\ empty(), to_repeat, {_, _, _} = while, opts \\ []) 1625 when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do 1626 non_empty!(to_repeat, "quoted_repeat_while") 1627 [{:repeat, Enum.reverse(to_repeat), while, opts[:gen_times]} | combinator] 1628 end 1629 1630 @doc """ 1631 Allow the combinator given on `to_repeat` to appear at least, at most 1632 or exactly a given amount of times. 1633 1634 ## Examples 1635 1636 defmodule MyParser do 1637 import NimbleParsec 1638 1639 defparsec :minimum_lower, times(ascii_char([?a..?z]), min: 2) 1640 end 1641 1642 MyParser.minimum_lower("abcd") 1643 #=> {:ok, [?a, ?b, ?c, ?d], "", %{}, {1, 0}, 4} 1644 1645 MyParser.minimum_lower("ab12") 1646 #=> {:ok, [?a, ?b], "12", %{}, {1, 0}, 2} 1647 1648 MyParser.minimum_lower("a123") 1649 #=> {:ok, [], "a123", %{}, {1, 0}, 0} 1650 1651 """ 1652 @spec times(t, pos_integer | [min_and_max]) :: t 1653 @spec times(t, t, pos_integer | [min_and_max]) :: t 1654 def times(combinator \\ empty(), to_repeat, count_or_min_max) 1655 1656 def times(combinator, to_repeat, n) 1657 when is_combinator(combinator) and is_combinator(to_repeat) and is_integer(n) and n >= 1 do 1658 non_empty!(to_repeat, "times") 1659 duplicate(combinator, to_repeat, n) 1660 end 1661 1662 def times(combinator, to_repeat, opts) 1663 when is_combinator(combinator) and is_combinator(to_repeat) and is_list(opts) do 1664 {min, max} = validate_min_and_max!(opts) 1665 non_empty!(to_repeat, "times") 1666 1667 combinator = 1668 if min > 0 do 1669 duplicate(combinator, to_repeat, min) 1670 else 1671 combinator 1672 end 1673 1674 to_repeat = Enum.reverse(to_repeat) 1675 1676 combinator = 1677 if max do 1678 [{:times, to_repeat, max - min} | combinator] 1679 else 1680 [{:repeat, to_repeat, {__MODULE__, :__cont_context__, []}, opts[:gen_times]} | combinator] 1681 end 1682 1683 combinator 1684 end 1685 1686 @doc """ 1687 Chooses one of the given combinators. 1688 1689 Expects at least two choices. 1690 1691 ## Beware! Char combinators 1692 1693 Note both `utf8_char/2` and `ascii_char/2` allow multiple ranges to 1694 be given. Therefore, instead this: 1695 1696 choice([ 1697 ascii_char([?a..?z]), 1698 ascii_char([?A..?Z]), 1699 ]) 1700 1701 One should simply prefer: 1702 1703 ascii_char([?a..?z, ?A..?Z]) 1704 1705 As the latter is compiled more efficiently by `NimbleParsec`. 1706 1707 ## Beware! Always successful combinators 1708 1709 If a combinator that always succeeds is given as a choice, that choice 1710 will always succeed which may lead to unused function warnings since 1711 any further choice won't ever be attempted. For example, because `repeat/2` 1712 always succeeds, the `string/2` combinator below it won't ever run: 1713 1714 choice([ 1715 repeat(ascii_char([?0..?9])), 1716 string("OK") 1717 ]) 1718 1719 Instead of `repeat/2`, you may want to use `times/3` with the flags `:min` 1720 and `:max`. 1721 """ 1722 @spec choice(nonempty_list(t)) :: t 1723 @spec choice(t, nonempty_list(t)) :: t 1724 @spec choice(t, nonempty_list(t), opts) :: t 1725 def choice(combinator \\ empty(), [_, _ | _] = choices, opts \\ []) 1726 when is_combinator(combinator) do 1727 choices = Enum.map(choices, &Enum.reverse/1) 1728 weights = opts[:gen_weights] 1729 1730 if weights && length(weights) != length(choices) do 1731 raise ArgumentError, ":gen_weights must be a list of the same size as choices" 1732 end 1733 1734 [{:choice, choices, weights} | combinator] 1735 end 1736 1737 @doc """ 1738 Marks the given combinator as `optional`. 1739 1740 It is equivalent to `choice([optional, empty()])`. 1741 """ 1742 @spec optional(t) :: t 1743 @spec optional(t, t) :: t 1744 def optional(combinator \\ empty(), optional) do 1745 choice(combinator, [optional, empty()]) 1746 end 1747 1748 ## Helpers 1749 1750 defp validate_min_and_max!(count_or_opts, required_min \\ 0) 1751 1752 defp validate_min_and_max!(count, required_min) 1753 when is_integer(count) do 1754 validate_min_and_max!([min: count], required_min) 1755 end 1756 1757 defp validate_min_and_max!(opts, required_min) do 1758 min = opts[:min] 1759 max = opts[:max] 1760 1761 cond do 1762 min && max -> 1763 validate_min_or_max!(:min, min, required_min) 1764 validate_min_or_max!(:max, max, 1) 1765 1766 max <= min and 1767 raise ArgumentError, 1768 "expected :max to be strictly greater than :min, got: #{min} and #{max}" 1769 1770 min -> 1771 validate_min_or_max!(:min, min, required_min) 1772 1773 max -> 1774 validate_min_or_max!(:max, max, 1) 1775 1776 true -> 1777 raise ArgumentError, "expected :min or :max to be given" 1778 end 1779 1780 {min || 0, max} 1781 end 1782 1783 defp validate_min_or_max!(kind, value, min) do 1784 unless is_integer(value) and value >= min do 1785 raise ArgumentError, 1786 "expected #{kind} to be an integer greater than or equal to #{min}, " <> 1787 "got: #{inspect(value)}" 1788 end 1789 end 1790 1791 defp split_ranges!(ranges, context) do 1792 Enum.split_with(ranges, &split_range!(&1, context)) 1793 end 1794 1795 defp split_range!(x, _context) when is_integer(x), do: true 1796 defp split_range!(_.._, _context), do: true 1797 defp split_range!({:not, x}, _context) when is_integer(x), do: false 1798 defp split_range!({:not, _.._}, _context), do: false 1799 1800 defp split_range!(range, context) do 1801 raise ArgumentError, "unknown range #{inspect(range)} given to #{context}" 1802 end 1803 1804 defp compile_call!(extra, {module, function, args}, _context) 1805 when is_atom(module) and is_atom(function) and is_list(args) do 1806 quote do 1807 unquote(module).unquote(function)( 1808 unquote_splicing(extra), 1809 unquote_splicing(Macro.escape(args)) 1810 ) 1811 end 1812 end 1813 1814 defp compile_call!(extra, {function, args}, _context) 1815 when is_atom(function) and is_list(args) do 1816 quote do 1817 unquote(function)(unquote_splicing(extra), unquote_splicing(Macro.escape(args))) 1818 end 1819 end 1820 1821 defp compile_call!(extra, function, _context) when is_atom(function) do 1822 quote do 1823 unquote(function)(unquote_splicing(extra)) 1824 end 1825 end 1826 1827 defp compile_call!(_args, unknown, context) do 1828 raise ArgumentError, "unknown call given to #{context}, got: #{inspect(unknown)}" 1829 end 1830 1831 defp non_empty!([], action) do 1832 raise ArgumentError, "cannot call #{action} on empty combinator" 1833 end 1834 1835 defp non_empty!(combinator, action) do 1836 if Enum.any?(combinator, &is_list/1) do 1837 raise ArgumentError, 1838 "invalid combinator given to #{action}, got a list of combinators instead" 1839 end 1840 end 1841 1842 ## Inner combinators 1843 1844 defp quoted_constant_traverse(combinator, to_traverse, call) do 1845 case to_traverse do 1846 [{:traverse, inner_traverse, :constant, inner_call}] -> 1847 [{:traverse, inner_traverse, :constant, [call | inner_call]} | combinator] 1848 1849 _ -> 1850 [{:traverse, Enum.reverse(to_traverse), :constant, [call]} | combinator] 1851 end 1852 end 1853 1854 defp quoted_traverse(combinator, to_traverse, pre_or_pos, call) do 1855 [{:traverse, Enum.reverse(to_traverse), pre_or_pos, [call]} | combinator] 1856 end 1857 1858 defp bin_segment(combinator, inclusive, exclusive, modifier) do 1859 [{:bin_segment, inclusive, exclusive, modifier} | combinator] 1860 end 1861 1862 ## Traverse callbacks 1863 1864 @doc false 1865 def __pre_traverse__(rest, acc, context, line, offset, call) do 1866 compile_call!([rest, acc, context, line, offset], call, "pre_traverse") 1867 end 1868 1869 @doc false 1870 def __post_traverse__(rest, acc, context, line, offset, call) do 1871 compile_call!([rest, acc, context, line, offset], call, "post_traverse") 1872 end 1873 1874 @doc false 1875 def __lookahead__(rest, _acc, context, line, offset, call) do 1876 compile_call!([rest, context, line, offset], call, "lookahead") 1877 end 1878 1879 @doc false 1880 def __wrap__(rest, acc, context, _line, _offset) do 1881 {:{}, [], [rest, [reverse_now_or_later(acc)], context]} 1882 end 1883 1884 @doc false 1885 def __tag__(rest, acc, context, _line, _offset, tag) do 1886 {:{}, [], [rest, [{tag, reverse_now_or_later(acc)}], context]} 1887 end 1888 1889 @doc false 1890 def __unwrap_and_tag__(rest, acc, context, _line, _offset, tag) when is_list(acc) do 1891 case acc do 1892 [one] -> {:{}, [], [rest, [{tag, one}], context]} 1893 many -> raise "unwrap_and_tag/3 expected a single token, got: #{inspect(many)}" 1894 end 1895 end 1896 1897 def __unwrap_and_tag__(rest, acc, context, _line, _offset, tag) do 1898 quoted = 1899 quote do 1900 case :lists.reverse(unquote(acc)) do 1901 [one] -> one 1902 many -> raise "unwrap_and_tag/3 expected a single token, got: #{inspect(many)}" 1903 end 1904 end 1905 1906 {:{}, [], [rest, [{tag, quoted}], context]} 1907 end 1908 1909 @doc false 1910 def __debug__(rest, acc, context, line, offset) do 1911 quote bind_quoted: [rest: rest, acc: acc, context: context, line: line, offset: offset] do 1912 IO.puts(""" 1913 == DEBUG == 1914 Bin: #{inspect(rest)} 1915 Acc: #{inspect(:lists.reverse(acc))} 1916 Ctx: #{inspect(context)} 1917 Lin: #{inspect(line)} 1918 Off: #{inspect(offset)} 1919 """) 1920 1921 {rest, acc, context} 1922 end 1923 end 1924 1925 @doc false 1926 def __constant__(rest, _acc, context, _line, _offset, constant) do 1927 {:{}, [], [rest, constant, context]} 1928 end 1929 1930 @doc false 1931 def __line__(rest, acc, context, line, _offset) do 1932 {:{}, [], [rest, [{reverse_now_or_later(acc), line}], context]} 1933 end 1934 1935 @doc false 1936 def __byte_offset__(rest, acc, context, _line, offset) do 1937 {:{}, [], [rest, [{reverse_now_or_later(acc), offset}], context]} 1938 end 1939 1940 @doc false 1941 def __map__(rest, acc, context, _line, _offset, var, call) do 1942 ast = 1943 quote do 1944 Enum.map(unquote(acc), fn unquote(var) -> unquote(call) end) 1945 end 1946 1947 {:{}, [], [rest, ast, context]} 1948 end 1949 1950 @doc false 1951 def __reduce__(rest, acc, context, _line, _offset, call) do 1952 {:{}, [], [rest, [compile_call!([reverse_now_or_later(acc)], call, "reduce")], context]} 1953 end 1954 1955 ## Repeat callbacks 1956 1957 @doc false 1958 def __cont_context__(_rest, context, _line, _offset) do 1959 {:cont, context} 1960 end 1961 1962 @doc false 1963 def __repeat_while__(quoted, context, line, offset, call) do 1964 compile_call!([quoted, context, line, offset], call, "repeat_while") 1965 end 1966 1967 ## Chars callbacks 1968 1969 defp min_max_compile_runtime_chars(combinator, to_repeat, count, compile, _runtime, args) 1970 when is_integer(count) and count >= 0 do 1971 chars = duplicate(to_repeat, count) 1972 quoted_post_traverse(combinator, chars, {__MODULE__, compile, [count | args]}) 1973 end 1974 1975 defp min_max_compile_runtime_chars(combinator, to_repeat, opts, compile, runtime, args) 1976 when is_list(opts) do 1977 {min, max} = validate_min_and_max!(opts) 1978 1979 chars = 1980 if min > 0 do 1981 min_max_compile_runtime_chars(empty(), to_repeat, min, compile, runtime, args) 1982 else 1983 empty() 1984 end 1985 1986 chars = 1987 if max do 1988 times(chars, to_repeat, max: max - min) 1989 else 1990 repeat(chars, to_repeat) 1991 end 1992 1993 quoted_post_traverse(combinator, chars, {__MODULE__, runtime, [min, max | args]}) 1994 end 1995 1996 @doc false 1997 def __runtime_string__(rest, acc, context, _line, _offset, _min, _max, _type) do 1998 ast = quote(do: List.to_string(unquote(reverse_now_or_later(acc)))) 1999 {:{}, [], [rest, [ast], context]} 2000 end 2001 2002 @doc false 2003 def __compile_string__(rest, acc, context, _line, _offset, _count, type) when is_list(acc) do 2004 acc = 2005 for entry <- :lists.reverse(acc) do 2006 {:"::", [], [entry, type]} 2007 end 2008 2009 {:{}, [], [rest, [{:<<>>, [], acc}], context]} 2010 end 2011 2012 def __compile_string__(rest, acc, context, _line, _offset, _count, _type) do 2013 ast = quote(do: List.to_string(unquote(reverse_now_or_later(acc)))) 2014 {:{}, [], [rest, [ast], context]} 2015 end 2016 2017 @doc false 2018 def __runtime_integer__(rest, acc, context, _line, _offset, min, _max) 2019 when is_integer(min) and min > 0 do 2020 ast = 2021 quote do 2022 [head | tail] = unquote(reverse_now_or_later(acc)) 2023 [:lists.foldl(fn x, acc -> x - ?0 + acc * 10 end, head, tail)] 2024 end 2025 2026 {:{}, [], [rest, ast, context]} 2027 end 2028 2029 def __runtime_integer__(rest, acc, context, _line, _offset, _min, _max) do 2030 ast = 2031 quote do 2032 [head | tail] = unquote(reverse_now_or_later(acc)) 2033 [:lists.foldl(fn x, acc -> x - ?0 + acc * 10 end, head - ?0, tail)] 2034 end 2035 2036 {:{}, [], [rest, ast, context]} 2037 end 2038 2039 @doc false 2040 def __compile_integer__(rest, acc, context, _line, _offset, _count) when is_list(acc) do 2041 ast = 2042 acc 2043 |> quoted_ascii_to_integer(1) 2044 |> Enum.reduce(&{:+, [], [&2, &1]}) 2045 2046 {:{}, [], [rest, [ast], context]} 2047 end 2048 2049 defp reverse_now_or_later(list) when is_list(list), do: :lists.reverse(list) 2050 defp reverse_now_or_later(expr), do: quote(do: :lists.reverse(unquote(expr))) 2051 2052 defp quoted_ascii_to_integer([var | vars], 1) do 2053 [quote(do: unquote(var) - ?0) | quoted_ascii_to_integer(vars, 10)] 2054 end 2055 2056 defp quoted_ascii_to_integer([var | vars], index) do 2057 [quote(do: (unquote(var) - ?0) * unquote(index)) | quoted_ascii_to_integer(vars, index * 10)] 2058 end 2059 2060 defp quoted_ascii_to_integer([], _index) do 2061 [] 2062 end 2063 end