compiler.ex (38470B)
1 defmodule NimbleParsec.Compiler do 2 @moduledoc false 3 @arity 6 4 5 @doc """ 6 Returns a parsec entrypoint named `name`. 7 """ 8 def entry_point(name) do 9 doc = """ 10 Parses the given `binary` as #{name}. 11 12 Returns `{:ok, [token], rest, context, position, byte_offset}` or 13 `{:error, reason, rest, context, line, byte_offset}` where `position` 14 describes the location of the #{name} (start position) as `{line, offset_to_start_of_line}`. 15 16 To column where the error occurred can be inferred from `byte_offset - offset_to_start_of_line`. 17 18 ## Options 19 20 * `:byte_offset` - the byte offset for the whole binary, defaults to 0 21 * `:line` - the line and the byte offset into that line, defaults to `{1, byte_offset}` 22 * `:context` - the initial context value. It will be converted to a map 23 """ 24 25 spec = 26 quote do 27 unquote(name)(binary, keyword) :: 28 {:ok, [term], rest, context, line, byte_offset} 29 | {:error, reason, rest, context, line, byte_offset} 30 when line: {pos_integer, byte_offset}, 31 byte_offset: pos_integer, 32 rest: binary, 33 reason: String.t(), 34 context: map 35 end 36 37 args = quote(do: [binary, opts \\ []]) 38 guards = quote(do: is_binary(binary)) 39 40 body = 41 quote do 42 context = Map.new(Keyword.get(opts, :context, [])) 43 byte_offset = Keyword.get(opts, :byte_offset, 0) 44 45 line = 46 case Keyword.get(opts, :line, 1) do 47 {_, _} = line -> line 48 line -> {line, byte_offset} 49 end 50 51 case unquote(:"#{name}__0")(binary, [], [], context, line, byte_offset) do 52 {:ok, acc, rest, context, line, offset} -> 53 {:ok, :lists.reverse(acc), rest, context, line, offset} 54 55 {:error, _, _, _, _, _} = error -> 56 error 57 end 58 end 59 60 {doc, spec, {name, args, guards, body}} 61 end 62 63 @doc """ 64 Compiles the given combinators into multiple definitions. 65 """ 66 def compile(name, [], _opts) do 67 raise ArgumentError, "cannot compile #{inspect(name)} with an empty parser combinator" 68 end 69 70 def compile(name, combinators, opts) when is_list(combinators) do 71 inline? = Keyword.get(opts, :inline, false) 72 {defs, inline} = compile(name, combinators) 73 74 if inline? do 75 {defs, inline} 76 else 77 {defs, []} 78 end 79 end 80 81 defp compile(name, combinators) do 82 config = %{ 83 acc_depth: 0, 84 catch_all: nil, 85 labels: [], 86 name: name, 87 replace: false 88 } 89 90 {next, step} = build_next(0, config) 91 92 {defs, inline, last, _step} = 93 combinators 94 |> Enum.reverse() 95 |> compile([], [], next, step, config) 96 97 {Enum.reverse([build_ok(last) | defs]), [{last, @arity} | inline]} 98 end 99 100 defp compile([], defs, inline, current, step, _config) do 101 {defs, inline, current, step} 102 end 103 104 defp compile([{:update, key, fun} | combinators], defs, inline, current, step, config) do 105 compile(combinators, defs, inline, current, step, Map.update!(config, key, fun)) 106 end 107 108 defp compile(combinators, defs, inline, current, step, config) do 109 {next_combinators, used_combinators, {new_defs, new_inline, next, step, catch_all}} = 110 case take_bound_combinators(combinators) do 111 {[combinator | combinators], [], [], [], [], _metadata} -> 112 case combinator do 113 {:label, label_combinators, label} -> 114 pre_combinators = [{:update, :labels, &[label | &1]} | label_combinators] 115 pos_combinators = [{:update, :labels, &tl(&1)} | combinators] 116 117 {pre_combinators ++ pos_combinators, [combinator], 118 {[], [], current, step, :catch_none}} 119 120 _ -> 121 {combinators, [combinator], 122 compile_unbound_combinator(combinator, current, step, config)} 123 end 124 125 {combinators, inputs, guards, outputs, acc, metadata} -> 126 {combinators, Enum.reverse(acc), 127 compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config)} 128 end 129 130 catch_all_defs = 131 case catch_all do 132 :catch_all -> [build_catch_all(:positive, current, used_combinators, config)] 133 :catch_none -> [] 134 end 135 136 defs = catch_all_defs ++ Enum.reverse(new_defs) ++ defs 137 compile(next_combinators, defs, new_inline ++ inline, next, step, config) 138 end 139 140 ## Unbound combinators 141 142 defp compile_unbound_combinator({:parsec, parsec}, current, step, config) do 143 {next, step} = build_next(step, config) 144 head = quote(do: [rest, acc, stack, context, line, offset]) 145 146 catch_all = 147 case config do 148 %{catch_all: nil} -> 149 quote(do: error) 150 151 %{catch_all: catch_all, acc_depth: n} -> 152 {_, _, _, body} = build_proxy_to(current, catch_all, n) 153 body 154 end 155 156 call = 157 case parsec do 158 {mod, fun} -> 159 quote do 160 unquote(mod).unquote(:"#{fun}__0")(rest, acc, [], context, line, offset) 161 end 162 163 fun -> 164 quote do 165 unquote(:"#{fun}__0")(rest, acc, [], context, line, offset) 166 end 167 end 168 169 body = 170 quote do 171 case unquote(call) do 172 {:ok, acc, rest, context, line, offset} -> 173 unquote(next)(rest, acc, stack, context, line, offset) 174 175 {:error, _, _, _, _, _} = error -> 176 unquote(catch_all) 177 end 178 end 179 180 def = {current, head, true, body} 181 {[def], [{current, @arity}], next, step, :catch_none} 182 end 183 184 defp compile_unbound_combinator({:lookahead, combinators, kind}, current, step, config) do 185 choices = extract_choices_from_lookahead(combinators) 186 187 if Enum.all?(choices, &all_bound_combinators?/1) do 188 {next, step} = build_next(step, config) 189 args = quote(do: [rest, acc, stack, context, line, offset]) 190 success_body = {next, [], args} 191 {_, _, _, failure_body} = build_catch_all(kind, current, combinators, config) 192 193 {success_body, failure_body} = 194 if kind == :positive, do: {success_body, failure_body}, else: {failure_body, success_body} 195 196 defs = 197 for choice <- choices do 198 {[], inputs, guards, _, _, metadata} = take_bound_combinators(choice) 199 {bin, _} = compile_bound_bin_pattern(inputs, metadata, quote(do: _)) 200 head = quote(do: [unquote(bin) = rest, acc, stack, context, line, offset]) 201 guards = guards_list_to_quoted(guards) 202 {current, head, guards, success_body} 203 end 204 205 defs = if [] in choices, do: defs, else: defs ++ [{current, args, true, failure_body}] 206 {defs, [], next, step, :catch_none} 207 else 208 compile_unbound_lookahead(combinators, kind, current, step, config) 209 end 210 end 211 212 defp compile_unbound_combinator( 213 {:traverse, combinators, kind, traversal}, 214 current, 215 step, 216 config 217 ) do 218 fun = &traverse(traversal, &1, &2, &3, &4, &5, &6, config) 219 config = if kind == :constant, do: put_in(config.replace, true), else: config 220 compile_unbound_traverse(combinators, kind, current, step, config, fun) 221 end 222 223 defp compile_unbound_combinator({:times, combinators, count}, current, step, config) do 224 if all_no_context_combinators?(combinators) do 225 compile_bound_times(combinators, count, current, step, config) 226 else 227 compile_unbound_times(combinators, count, current, step, config) 228 end 229 end 230 231 defp compile_unbound_combinator({:repeat, combinators, while, _gen}, current, step, config) do 232 {failure, step} = build_next(step, config) 233 config = %{config | catch_all: failure, acc_depth: 0} 234 235 if all_no_context_combinators?(combinators) do 236 compile_bound_repeat(combinators, while, current, failure, step, config) 237 else 238 compile_unbound_repeat(combinators, while, current, failure, step, config) 239 end 240 end 241 242 defp compile_unbound_combinator({:eventually, combinators}, current, step, config) do 243 compile_eventually(combinators, current, step, config) 244 end 245 246 defp compile_unbound_combinator({:choice, choices, _} = combinator, current, step, config) do 247 config = 248 update_in(config.labels, fn 249 [] -> [label(combinator)] 250 other -> other 251 end) 252 253 if Enum.all?(choices, &all_bound_combinators?/1) do 254 compile_bound_choice(choices, current, step, config) 255 else 256 compile_unbound_choice(choices, current, step, config) 257 end 258 end 259 260 ## Lookahead 261 262 defp extract_choices_from_lookahead([{:choice, choices, _}]), do: choices 263 defp extract_choices_from_lookahead(other), do: [other] 264 265 defp compile_unbound_lookahead(combinators, kind, current, step, config) do 266 {_, _, _, catch_all} = build_catch_all(kind, current, combinators, config) 267 268 {next, step} = build_next(step, config) 269 head = quote(do: [rest, acc, stack, context, line, offset]) 270 271 args = 272 quote(do: [rest, [], [{rest, acc, context, line, offset} | stack], context, line, offset]) 273 274 body = {next, [], args} 275 entry_point = {current, head, true, body} 276 277 {failure, step} = build_next(step, config) 278 config = %{config | catch_all: failure, acc_depth: 0} 279 {defs, inline, success, step} = compile(combinators, [entry_point], [], next, step, config) 280 281 {next, step} = build_next(step, config) 282 head = quote(do: [_, _, [{rest, acc, context, line, offset} | stack], _, _, _]) 283 args = quote(do: [rest, acc, stack, context, line, offset]) 284 body = {next, [], args} 285 286 success_failure = 287 if kind == :positive do 288 [{success, head, true, body}, {failure, head, true, catch_all}] 289 else 290 [{failure, head, true, body}, {success, head, true, catch_all}] 291 end 292 293 inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline] 294 {Enum.reverse(success_failure ++ defs), inline, next, step, :catch_none} 295 end 296 297 ## Traverse 298 299 defp compile_unbound_traverse([], _kind, current, step, config, fun) do 300 {next, step} = build_next(step, config) 301 head = quote(do: [rest, acc, stack, context, line, offset]) 302 [rest, _, _, context, line, offset] = head 303 304 body = fun.(next, rest, [], context, line, offset) 305 def = {current, head, true, body} 306 {[def], [{current, @arity}], next, step, :catch_none} 307 end 308 309 defp compile_unbound_traverse(combinators, kind, current, step, config, fun) do 310 {next, step} = build_next(step, config) 311 head = quote(do: [rest, acc, stack, context, line, offset]) 312 313 args = 314 if kind == :pre do 315 quote(do: [rest, [], [{acc, line, offset} | stack], context, line, offset]) 316 else 317 quote(do: [rest, [], [acc | stack], context, line, offset]) 318 end 319 320 body = {next, [], args} 321 entry_point = {current, head, true, body} 322 323 config = update_in(config.acc_depth, &(&1 + 1)) 324 {defs, inline, last, step} = compile(combinators, [entry_point], [], next, step, config) 325 326 # Now we need to traverse the accumulator with the user code and 327 # concatenate with the previous accumulator at the top of the stack. 328 {next, step} = build_next(step, config) 329 330 {head, {traverse_line, traverse_offset}} = 331 if kind == :pre do 332 quote do 333 {[rest, user_acc, [{acc, stack_line, stack_offset} | stack], context, line, offset], 334 {stack_line, stack_offset}} 335 end 336 else 337 quote do 338 {[rest, user_acc, [acc | stack], context, line, offset], {line, offset}} 339 end 340 end 341 342 [rest, user_acc, _, context | _] = head 343 body = fun.(next, rest, user_acc, context, traverse_line, traverse_offset) 344 last_def = {last, head, true, body} 345 346 inline = [{current, @arity}, {last, @arity} | inline] 347 {Enum.reverse([last_def | defs]), inline, next, step, :catch_none} 348 end 349 350 defp traverse(_traversal, next, _, user_acc, _, _, _, %{replace: true}) do 351 quote do 352 _ = unquote(user_acc) 353 unquote(next)(rest, acc, stack, context, line, offset) 354 end 355 end 356 357 defp traverse(traversal, next, rest, user_acc, context, line, offset, _) do 358 case apply_traverse(traversal, rest, user_acc, context, line, offset) do 359 {:{}, _, [rest, expanded_acc, context]} -> 360 quote do 361 _ = unquote(user_acc) 362 363 unquote(next)( 364 unquote(rest), 365 unquote(expanded_acc) ++ acc, 366 stack, 367 unquote(context), 368 line, 369 offset 370 ) 371 end 372 373 {:error, reason} -> 374 quote do 375 {:error, unquote(reason), rest, context, line, offset} 376 end 377 378 quoted -> 379 quote generated: true do 380 case unquote(quoted) do 381 {rest, user_acc, context} when is_list(user_acc) -> 382 unquote(next)(rest, user_acc ++ acc, stack, context, line, offset) 383 384 {:error, reason} -> 385 {:error, reason, rest, context, line, offset} 386 end 387 end 388 end 389 end 390 391 defp apply_traverse(mfargs, rest, acc, context, line, offset) do 392 apply_traverse(Enum.reverse(mfargs), {:{}, [], [rest, acc, context]}, line, offset) 393 end 394 395 defp apply_traverse([mfargs | tail], {:{}, _, [rest, acc, context]}, line, offset) do 396 rest_acc_context = apply_traverse_mfa(mfargs, [rest, acc, context, line, offset], rest) 397 apply_traverse(tail, rest_acc_context, line, offset) 398 end 399 400 defp apply_traverse([], rest_acc_context, _line, _offset) do 401 rest_acc_context 402 end 403 404 defp apply_traverse(tail, rest_acc_context, line, offset) do 405 pattern = quote(do: {rest, acc, context}) 406 args = [quote(do: rest), quote(do: acc), quote(do: context), line, offset] 407 408 entries = 409 Enum.map(tail, fn mfargs -> 410 quote(do: unquote(pattern) <- unquote(apply_traverse_mfa(mfargs, args, quote(do: rest)))) 411 end) 412 413 quote do 414 with unquote(pattern) <- unquote(rest_acc_context), unquote_splicing(entries) do 415 {rest, acc, context} 416 end 417 end 418 end 419 420 defp apply_traverse_mfa(mfargs, args, rest) do 421 case apply_mfa(mfargs, args) do 422 {:{}, _, [_, _, _]} = res -> 423 res 424 425 {acc, context} when acc != :error -> 426 # IO.warn( 427 # "Returning a two-element tuple {acc, context} in pre_traverse/post_traverse is deprecated, " <> 428 # "please return {rest, acc, context} instead" 429 # ) 430 431 {:{}, [], [rest, acc, context]} 432 433 {:error, context} -> 434 {:error, context} 435 436 quoted -> 437 # TODO: Deprecate two element tuple return that is not error 438 quote generated: true do 439 case unquote(quoted) do 440 {_, _, _} = res -> res 441 {:error, reason} -> {:error, reason} 442 {acc, context} -> {unquote(rest), acc, context} 443 end 444 end 445 end 446 end 447 448 ## Eventually 449 450 defp compile_eventually(combinators, current, step, config) do 451 # First add the initial accumulator to the stack 452 {entrypoint, step} = build_next(step, config) 453 head = quote(do: [rest, acc, stack, context, line, offset]) 454 args = quote(do: [rest, acc, [acc | stack], context, line, offset]) 455 body = {entrypoint, [], args} 456 current_def = {current, head, true, body} 457 458 # Now define the failure point which will recur 459 {failure, step} = build_next(step, config) 460 failure_def = build_eventually_next_def(entrypoint, failure) 461 config = update_in(config.acc_depth, &(&1 + 1)) 462 catch_all_def = build_catch_all(:positive, failure, combinators, config) 463 464 # And compile down the inner combinators 465 config = %{config | catch_all: failure, acc_depth: 0} 466 {defs, inline, success, step} = compile(combinators, [], [], entrypoint, step, config) 467 468 # In the exit remove the accumulator from the stack 469 {exitpoint, step} = build_next(step, config) 470 head = quote(do: [rest, acc, [_ | stack], context, line, offset]) 471 args = quote(do: [rest, acc, stack, context, line, offset]) 472 body = {exitpoint, [], args} 473 success_def = {success, head, true, body} 474 475 defs = Enum.reverse(defs, [success_def, current_def, failure_def, catch_all_def]) 476 inline = [{success, @arity}, {current, @arity}, {failure, @arity} | inline] 477 {defs, inline, exitpoint, step, :catch_none} 478 end 479 480 defp build_eventually_next_def(entrypoint, failure) do 481 head = quote(do: [<<byte, rest::binary>>, _acc, [acc | _] = stack, context, line, offset]) 482 offset = add_offset(quote(do: offset), 1) 483 line = add_line(quote(do: line), offset, quote(do: byte)) 484 body = {entrypoint, [], quote(do: [rest, acc, stack, context]) ++ [line, offset]} 485 {failure, head, true, body} 486 end 487 488 ## Repeat 489 490 defp compile_bound_repeat(combinators, while, current, failure, step, config) do 491 {defs, recur, next, step} = 492 case apply_mfa(while, quote(do: [rest, context, line, offset])) do 493 {:cont, quote(do: context)} -> 494 {[], current, current, step} 495 496 quoted -> 497 {next, step} = build_next(step, config) 498 head = args = quote(do: [rest, acc, stack, context, line, offset]) 499 body = repeat_while(quoted, next, args, failure, args) 500 {[{current, head, true, body}], current, next, step} 501 end 502 503 {defs, inline, success, step} = compile(combinators, defs, [], next, step, config) 504 def = build_proxy_to(success, recur, 0) 505 {Enum.reverse([def | defs]), [{success, @arity} | inline], failure, step, :catch_none} 506 end 507 508 defp compile_unbound_repeat(combinators, while, current, failure, step, config) do 509 {recur, step} = build_next(step, config) 510 {defs, inline, success, step} = compile(combinators, [], [], recur, step, config) 511 512 {next, step} = build_next(step, config) 513 head = quote(do: [_, _, [{rest, acc, context, line, offset} | stack], _, _, _]) 514 args = quote(do: [rest, acc, stack, context, line, offset]) 515 body = {next, [], args} 516 failure_def = {failure, head, true, body} 517 518 while = apply_mfa(while, quote(do: [rest, context, line, offset])) 519 cont = quote(do: {rest, acc, context, line, offset}) 520 521 head = 522 quote do 523 [inner_rest, inner_acc, [unquote(cont) | stack], inner_context, inner_line, inner_offset] 524 end 525 526 cont = quote(do: {inner_rest, inner_acc ++ acc, inner_context, inner_line, inner_offset}) 527 528 true_args = 529 quote do 530 [inner_rest, [], [unquote(cont) | stack], inner_context, inner_line, inner_offset] 531 end 532 533 false_args = quote(do: [rest, acc, stack, context, line, offset]) 534 535 # We need to do this dance because of unused variables 536 body = 537 case compile_time_repeat_while(while) do 538 :cont -> 539 quote do 540 _ = {rest, acc, context, line, offset} 541 unquote({recur, [], true_args}) 542 end 543 544 :halt -> 545 quote do 546 _ = {inner_rest, inner_acc, inner_context, inner_line, inner_offset} 547 unquote({next, [], false_args}) 548 end 549 550 :none -> 551 repeat_while(while, recur, true_args, next, false_args) 552 end 553 554 success_def = {success, head, true, body} 555 head = quote(do: [rest, acc, stack, context, line, offset]) 556 557 true_args = 558 quote do 559 [rest, [], [{rest, acc, context, line, offset} | stack], context, line, offset] 560 end 561 562 false_args = quote(do: [rest, acc, stack, context, line, offset]) 563 body = repeat_while(while, recur, true_args, next, false_args) 564 current_def = {current, head, true, body} 565 566 defs = [current_def | Enum.reverse([success_def, failure_def | defs])] 567 inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline] 568 {defs, inline, next, step, :catch_none} 569 end 570 571 defp compile_time_repeat_while({:cont, quote(do: context)}), do: :cont 572 defp compile_time_repeat_while({:halt, quote(do: context)}), do: :halt 573 defp compile_time_repeat_while(_), do: :none 574 575 defp repeat_while(quoted, true_name, true_args, false_name, false_args) do 576 case compile_time_repeat_while(quoted) do 577 :cont -> 578 {true_name, [], true_args} 579 580 :halt -> 581 {false_name, [], false_args} 582 583 :none -> 584 quote do 585 case unquote(quoted) do 586 {:cont, context} -> unquote({true_name, [], true_args}) 587 {:halt, context} -> unquote({false_name, [], false_args}) 588 end 589 end 590 end 591 end 592 593 ## Repeat up to 594 595 defp compile_bound_times(combinators, count, current, step, config) do 596 {failure, step} = build_next(step, config) 597 {recur, step} = build_next(step, config) 598 599 head = quote(do: [rest, acc, stack, context, line, offset]) 600 args = quote(do: [rest, acc, [unquote(count) | stack], context, line, offset]) 601 body = {recur, [], args} 602 current_def = {current, head, true, body} 603 604 config = %{config | catch_all: failure, acc_depth: 0} 605 {defs, inline, success, step} = compile(combinators, [current_def], [], recur, step, config) 606 607 {next, step} = build_next(step, config) 608 head = quote(do: [rest, acc, [1 | stack], context, line, offset]) 609 args = quote(do: [rest, acc, stack, context, line, offset]) 610 body = {next, [], args} 611 success_def0 = {success, head, true, body} 612 613 head = quote(do: [rest, acc, [count | stack], context, line, offset]) 614 args = quote(do: [rest, acc, [count - 1 | stack], context, line, offset]) 615 body = {recur, [], args} 616 success_def1 = {success, head, true, body} 617 618 head = quote(do: [rest, acc, [_ | stack], context, line, offset]) 619 args = quote(do: [rest, acc, stack, context, line, offset]) 620 body = {next, [], args} 621 failure_def = {failure, head, true, body} 622 623 defs = Enum.reverse([success_def1, success_def0, failure_def | defs]) 624 inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline] 625 {defs, inline, next, step, :catch_none} 626 end 627 628 defp compile_unbound_times(combinators, count, current, step, config) do 629 {failure, step} = build_next(step, config) 630 {recur, step} = build_next(step, config) 631 632 head = quote(do: [rest, acc, stack, context, line, offset]) 633 cont = quote(do: {unquote(count), rest, acc, context, line, offset}) 634 args = quote(do: [rest, [], [unquote(cont) | stack], context, line, offset]) 635 body = {recur, [], args} 636 current_def = {current, head, true, body} 637 638 config = %{config | catch_all: failure, acc_depth: 0} 639 {defs, inline, success, step} = compile(combinators, [current_def], [], recur, step, config) 640 641 {next, step} = build_next(step, config) 642 head = quote(do: [rest, user_acc, [{1, _, acc, _, _, _} | stack], context, line, offset]) 643 args = quote(do: [rest, user_acc ++ acc, stack, context, line, offset]) 644 body = {next, [], args} 645 success_def0 = {success, head, true, body} 646 647 head = quote(do: [rest, user_acc, [{count, _, acc, _, _, _} | stack], context, line, offset]) 648 cont = quote(do: {count - 1, rest, user_acc ++ acc, context, line, offset}) 649 args = quote(do: [rest, [], [unquote(cont) | stack], context, line, offset]) 650 body = {recur, [], args} 651 success_def1 = {success, head, true, body} 652 653 head = quote(do: [_, _, [{_, rest, acc, context, line, offset} | stack], _, _, _]) 654 args = quote(do: [rest, acc, stack, context, line, offset]) 655 body = {next, [], args} 656 failure_def = {failure, head, true, body} 657 658 defs = Enum.reverse([success_def1, success_def0, failure_def | defs]) 659 inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline] 660 {defs, inline, next, step, :catch_none} 661 end 662 663 ## Choice 664 665 defp compile_bound_choice(choices, current, step, config) do 666 {next_name, next_step} = build_next(step, config) 667 668 defs = 669 for choice <- choices do 670 {[], inputs, guards, outputs, _, metadata} = take_bound_combinators(choice) 671 672 {[def], [], ^next_name, ^next_step, _} = 673 compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config) 674 675 def 676 end 677 678 catch_all = if [] in choices, do: :catch_none, else: :catch_all 679 {defs, [], next_name, next_step, catch_all} 680 end 681 682 defp compile_unbound_choice(choices, current, step, config) do 683 {done, step} = build_next(step, config) 684 685 # We process choices in reverse order. The last order does not 686 # have any fallback besides the requirement to drop the stack 687 # this allows us to compose with repeat and traverse. 688 config = update_in(config.acc_depth, &(&1 + 2)) 689 690 {first, defs, inline, step} = 691 compile_unbound_choice(Enum.reverse(choices), [], [], :unused, step, done, config) 692 693 head = quote(do: [rest, acc, stack, context, line, offset]) 694 cont = quote(do: {rest, context, line, offset}) 695 args = quote(do: [rest, [], [unquote(cont), acc | stack], context, line, offset]) 696 body = {first, [], args} 697 def = {current, head, true, body} 698 699 {[def | Enum.reverse(defs)], [{current, @arity} | inline], done, step, :catch_none} 700 end 701 702 defp compile_unbound_choice([], defs, inline, previous, step, _success, _config) do 703 # Discard the last failure definition that won't be used. 704 {previous, tl(defs), tl(inline), step - 1} 705 end 706 707 defp compile_unbound_choice([choice | choices], defs, inline, _previous, step, done, config) do 708 {current, step} = build_next(step, config) 709 {defs, inline, success, step} = compile(choice, defs, inline, current, step, config) 710 711 head = quote(do: [rest, acc, [_, previous_acc | stack], context, line, offset]) 712 args = quote(do: [rest, acc ++ previous_acc, stack, context, line, offset]) 713 body = {done, [], args} 714 success_def = {success, head, true, body} 715 716 {failure, step} = build_next(step, config) 717 head = quote(do: [_, _, [{rest, context, line, offset} | _] = stack, _, _, _]) 718 args = quote(do: [rest, [], stack, context, line, offset]) 719 body = {current, [], args} 720 failure_def = {failure, head, true, body} 721 722 defs = [failure_def, success_def | defs] 723 inline = [{failure, @arity}, {success, @arity} | inline] 724 config = %{config | catch_all: failure, acc_depth: 0} 725 compile_unbound_choice(choices, defs, inline, current, step, done, config) 726 end 727 728 ## No context combinators 729 730 # If a combinator does not need a context, i.e. it cannot abort 731 # in the middle, then we can compile to an optimized version of 732 # repeat and times. 733 # 734 # For example, a lookahead at the beginning doesn't need a context. 735 # A choice that is bound doesn't need one either. 736 defp all_no_context_combinators?([{:lookahead, look_combinators, _} | combinators]) do 737 all_bound_combinators?(look_combinators) and 738 all_no_context_combinators_next?(combinators) 739 end 740 741 defp all_no_context_combinators?(combinators) do 742 all_no_context_combinators_next?(combinators) 743 end 744 745 defp all_no_context_combinators_next?([{:choice, choice_combinators, _} | combinators]) do 746 all_bound_combinators?(choice_combinators) and 747 all_no_context_combinators_next?(combinators) 748 end 749 750 defp all_no_context_combinators_next?(combinators) do 751 all_bound_combinators?(combinators) 752 end 753 754 ## Bound combinators 755 756 # A bound combinator is a combinator where the number of inputs, guards, 757 # outputs, line and offset shifts are known at compilation time. We inline 758 # those bound combinators into a single bitstring pattern for performance. 759 # Currently error reporting will accuse the beginning of the bound combinator 760 # in case of errors but such can be addressed if desired. 761 762 defp compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config) do 763 %{line: line, offset: offset} = metadata 764 {next, step} = build_next(step, config) 765 {bin, rest} = compile_bound_bin_pattern(inputs, metadata, quote(do: rest)) 766 767 acc = if config.replace, do: quote(do: acc), else: quote(do: unquote(outputs) ++ acc) 768 769 args = 770 quote(do: [unquote(rest), unquote(acc), stack, context, unquote(line), unquote(offset)]) 771 772 head = quote(do: [unquote(bin), acc, stack, context, comb__line, comb__offset]) 773 body = {next, [], args} 774 775 guards = guards_list_to_quoted(guards) 776 def = {current, head, guards, body} 777 {[def], [], next, step, :catch_all} 778 end 779 780 defp compile_bound_bin_pattern(inputs, %{eos: eos?}, var) do 781 rest = if eos?, do: "", else: var 782 bin = {:<<>>, [], inputs ++ [quote(do: unquote(rest) :: binary)]} 783 {bin, rest} 784 end 785 786 defp all_bound_combinators?(combinators) do 787 match?({[], _, _, _, _, _}, take_bound_combinators(combinators)) 788 end 789 790 defp take_bound_combinators(combinators) do 791 {line, offset} = line_offset_pair() 792 metadata = %{eos: false, line: line, offset: offset, counter: 0} 793 take_bound_combinators(combinators, [], [], [], [], metadata) 794 end 795 796 defp take_bound_combinators([:eos | combinators], inputs, guards, outputs, acc, metadata) do 797 combinators = Enum.drop_while(combinators, &(&1 == :eos)) 798 {combinators, inputs, guards, outputs, [:eos | acc], %{metadata | eos: true}} 799 end 800 801 defp take_bound_combinators(combinators, inputs, guards, outputs, acc, metadata) do 802 with [combinator | combinators] <- combinators, 803 {:ok, new_inputs, new_guards, new_outputs, metadata} <- 804 bound_combinator(combinator, metadata) do 805 take_bound_combinators( 806 combinators, 807 inputs ++ new_inputs, 808 guards ++ new_guards, 809 merge_output(new_outputs, outputs), 810 [combinator | acc], 811 metadata 812 ) 813 else 814 _ -> 815 {combinators, inputs, guards, outputs, acc, metadata} 816 end 817 end 818 819 defp merge_output(left, right) when is_list(left) and is_list(right), do: left ++ right 820 defp merge_output(left, right), do: quote(do: unquote(left) ++ unquote(right)) 821 822 defp bound_combinator({:string, string}, %{line: line, offset: offset} = metadata) do 823 size = byte_size(string) 824 825 line = 826 case String.split(string, "\n") do 827 [_] -> 828 line 829 830 [_ | _] = many -> 831 last_size = many |> List.last() |> byte_size() 832 line_offset = add_offset(offset, size - last_size) 833 834 quote do 835 {elem(unquote(line), 0) + unquote(length(many) - 1), unquote(line_offset)} 836 end 837 end 838 839 offset = add_offset(offset, size) 840 {:ok, [string], [], [string], %{metadata | line: line, offset: offset}} 841 end 842 843 defp bound_combinator({:bin_segment, inclusive, exclusive, modifier}, metadata) do 844 %{line: line, offset: offset, counter: counter} = metadata 845 846 {var, counter} = build_var(counter) 847 input = apply_bin_modifier(var, modifier) 848 guards = compile_bin_ranges(var, inclusive, exclusive) 849 850 offset = 851 if modifier == :integer do 852 add_offset(offset, 1) 853 else 854 add_offset(offset, quote(do: byte_size(<<unquote(input)>>))) 855 end 856 857 line = 858 if newline_allowed?(inclusive) and not newline_forbidden?(exclusive) do 859 add_line(line, offset, var) 860 else 861 line 862 end 863 864 metadata = %{metadata | line: line, offset: offset, counter: counter} 865 {:ok, [input], guards, [var], metadata} 866 end 867 868 defp bound_combinator({:label, combinators, _labels}, metadata) do 869 case take_bound_combinators(combinators, [], [], [], [], metadata) do 870 {[], inputs, guards, outputs, _, metadata} -> 871 {:ok, inputs, guards, outputs, metadata} 872 873 {_, _, _, _, _, _} -> 874 :error 875 end 876 end 877 878 defp bound_combinator({:traverse, combinators, kind, mfargs}, pre_metadata) do 879 case take_bound_combinators(combinators, [], [], [], [], pre_metadata) do 880 {[], inputs, guards, outputs, _, post_metadata} -> 881 {rest, context} = quote(do: {rest, context}) 882 {traverse_line, traverse_offset} = pre_post_traverse(kind, pre_metadata, post_metadata) 883 884 case apply_traverse(mfargs, rest, outputs, context, traverse_line, traverse_offset) do 885 {:{}, _, [^rest, outputs, ^context]} when outputs != :error -> 886 {:ok, inputs, guards, outputs, post_metadata} 887 888 _ -> 889 :error 890 end 891 892 {_, _, _, _, _, _} -> 893 :error 894 end 895 end 896 897 defp bound_combinator(_, _) do 898 :error 899 end 900 901 ## Line and offset handling 902 903 # For pre traversal returns the AST before, otherwise the AST after 904 # for post. For constant, line/offset are never used. 905 defp pre_post_traverse(:pre, %{line: line, offset: offset}, _), do: {line, offset} 906 defp pre_post_traverse(_, _, %{line: line, offset: offset}), do: {line, offset} 907 908 defp line_offset_pair() do 909 quote(do: {comb__line, comb__offset}) 910 end 911 912 defp add_offset({:+, _, [var, current]}, extra) 913 when is_integer(current) and is_integer(extra) do 914 {:+, [], [var, current + extra]} 915 end 916 917 defp add_offset(var, extra) do 918 {:+, [], [var, extra]} 919 end 920 921 defp newline_allowed?([]), do: true 922 923 defp newline_allowed?(ors) do 924 Enum.any?(ors, fn 925 _.._ = range -> ?\n in range 926 codepoint -> ?\n === codepoint 927 end) 928 end 929 930 defp newline_forbidden?([]), do: false 931 932 defp newline_forbidden?(ands) do 933 Enum.any?(ands, fn 934 {:not, _.._ = range} -> ?\n in range 935 {:not, codepoint} -> ?\n === codepoint 936 end) 937 end 938 939 defp add_line(line, offset, var) do 940 quote do 941 line = unquote(line) 942 943 case unquote(var) do 944 ?\n -> {elem(line, 0) + 1, unquote(offset)} 945 _ -> line 946 end 947 end 948 end 949 950 ## Label 951 952 defp labels([]) do 953 "nothing" 954 end 955 956 defp labels(combinators) do 957 Enum.map_join(combinators, ", followed by ", &label/1) 958 end 959 960 defp label({:string, binary}) do 961 "string #{inspect(binary)}" 962 end 963 964 defp label({:label, _combinator, label}) do 965 label 966 end 967 968 defp label({:bin_segment, inclusive, exclusive, modifier}) do 969 {inclusive, printable?} = Enum.map_reduce(inclusive, true, &inspect_bin_range(&1, &2)) 970 971 {exclusive, printable?} = 972 Enum.map_reduce(exclusive, printable?, &inspect_bin_range(elem(&1, 1), &2)) 973 974 prefix = 975 cond do 976 modifier == :integer and not printable? -> "byte" 977 modifier == :integer -> "ASCII character" 978 modifier == :utf8 -> "utf8 codepoint" 979 modifier == :utf16 -> "utf16 codepoint" 980 modifier == :utf32 -> "utf32 codepoint" 981 end 982 983 prefix <> Enum.join([Enum.join(inclusive, " or") | exclusive], ", and not") 984 end 985 986 defp label(:eos) do 987 "end of string" 988 end 989 990 defp label({:lookahead, combinators, _}) do 991 labels(combinators) 992 end 993 994 defp label({:repeat, combinators, _, _}) do 995 labels(combinators) 996 end 997 998 defp label({:eventually, combinators}) do 999 labels(combinators) <> " eventually" 1000 end 1001 1002 defp label({:times, combinators, _}) do 1003 labels(combinators) 1004 end 1005 1006 defp label({:choice, choices, _}) do 1007 Enum.map_join(choices, " or ", &labels/1) 1008 end 1009 1010 defp label({:traverse, combinators, _, _}) do 1011 labels(combinators) 1012 end 1013 1014 defp label({:parsec, {_module, function}}) do 1015 Atom.to_string(function) 1016 end 1017 1018 defp label({:parsec, name}) do 1019 Atom.to_string(name) 1020 end 1021 1022 ## Bin segments 1023 1024 defp compile_bin_ranges(var, ors, ands) do 1025 ands = Enum.map(ands, &bin_range_to_guard(var, &1)) 1026 1027 if ors == [] do 1028 ands 1029 else 1030 ors = 1031 ors 1032 |> Enum.map(&bin_range_to_guard(var, &1)) 1033 |> Enum.reduce(&{:or, [], [&2, &1]}) 1034 1035 [ors | ands] 1036 end 1037 end 1038 1039 defp bin_range_to_guard(var, range) do 1040 case range do 1041 min..max when min < max -> 1042 quote(do: unquote(var) >= unquote(min) and unquote(var) <= unquote(max)) 1043 1044 min..max when min > max -> 1045 quote(do: unquote(var) >= unquote(max) and unquote(var) <= unquote(min)) 1046 1047 min..min -> 1048 quote(do: unquote(var) === unquote(min)) 1049 1050 min when is_integer(min) -> 1051 quote(do: unquote(var) === unquote(min)) 1052 1053 {:not, min..max} when min < max -> 1054 quote(do: unquote(var) < unquote(min) or unquote(var) > unquote(max)) 1055 1056 {:not, min..max} when min > max -> 1057 quote(do: unquote(var) < unquote(max) or unquote(var) > unquote(min)) 1058 1059 {:not, min..min} -> 1060 quote(do: unquote(var) !== unquote(min)) 1061 1062 {:not, min} when is_integer(min) -> 1063 quote(do: unquote(var) !== unquote(min)) 1064 end 1065 end 1066 1067 defp inspect_bin_range(min..max, printable?) do 1068 {" in the range #{inspect_char(min)} to #{inspect_char(max)}", 1069 printable? and printable?(min) and printable?(max)} 1070 end 1071 1072 defp inspect_bin_range(min, printable?) do 1073 {" equal to #{inspect_char(min)}", printable? and printable?(min)} 1074 end 1075 1076 defp printable?(codepoint), do: List.ascii_printable?([codepoint]) 1077 defp inspect_char(codepoint), do: inspect([codepoint], charlists: :as_charlists) 1078 1079 defp apply_bin_modifier(expr, :integer), do: expr 1080 1081 defp apply_bin_modifier(expr, modifier) do 1082 {:"::", [], [expr, Macro.var(modifier, __MODULE__)]} 1083 end 1084 1085 ## Helpers 1086 1087 defp apply_mfa({mod, fun, args}, extra) do 1088 apply(mod, fun, extra ++ args) 1089 end 1090 1091 defp guards_list_to_quoted([]), do: true 1092 defp guards_list_to_quoted(guards), do: Enum.reduce(guards, &{:and, [], [&2, &1]}) 1093 1094 defp build_var(counter) do 1095 {{:"x#{counter}", [], __MODULE__}, counter + 1} 1096 end 1097 1098 defp build_next(step, %{name: name}) do 1099 {:"#{name}__#{step}", step + 1} 1100 end 1101 1102 defp build_ok(current) do 1103 head = quote(do: [rest, acc, _stack, context, line, offset]) 1104 body = quote(do: {:ok, acc, rest, context, line, offset}) 1105 {current, head, true, body} 1106 end 1107 1108 defp build_catch_all(kind, name, combinators, %{catch_all: nil, labels: labels}) do 1109 reason = error_reason(combinators, labels) 1110 reason = if kind == :positive, do: "expected " <> reason, else: "did not expect " <> reason 1111 args = quote(do: [rest, _acc, _stack, context, line, offset]) 1112 body = quote(do: {:error, unquote(reason), rest, context, line, offset}) 1113 {name, args, true, body} 1114 end 1115 1116 defp build_catch_all(_kind, name, _combinators, %{catch_all: next, acc_depth: n}) do 1117 build_proxy_to(name, next, n) 1118 end 1119 1120 defp build_acc_depth(1, acc, stack), do: [{:|, [], [acc, stack]}] 1121 defp build_acc_depth(n, acc, stack), do: [quote(do: _) | build_acc_depth(n - 1, acc, stack)] 1122 1123 defp build_proxy_to(name, next, 0) do 1124 args = quote(do: [rest, acc, stack, context, line, offset]) 1125 body = {next, [], args} 1126 {name, args, true, body} 1127 end 1128 1129 defp build_proxy_to(name, next, n) do 1130 args = quote(do: [rest, _acc, stack, context, line, offset]) 1131 {acc, stack} = quote(do: {acc, stack}) 1132 1133 body = 1134 quote do 1135 unquote(build_acc_depth(n, acc, stack)) = stack 1136 unquote(next)(rest, acc, stack, context, line, offset) 1137 end 1138 1139 {name, args, true, body} 1140 end 1141 1142 defp error_reason(combinators, []) do 1143 labels(combinators) 1144 end 1145 1146 defp error_reason(_combinators, [head]) do 1147 head 1148 end 1149 1150 defp error_reason(_combinators, [head | tail]) do 1151 "#{head} while processing #{Enum.join(tail, " inside ")}" 1152 end 1153 end