heredocs.ex (15705B)
1 defmodule Credo.Code.Heredocs do 2 @moduledoc """ 3 This module lets you strip heredocs from source code. 4 """ 5 6 alias Credo.Code.InterpolationHelper 7 alias Credo.SourceFile 8 9 alphabet = ~w(a b c d e f g h i j k l m n o p q r s t u v w x y z) 10 11 sigil_delimiters = [ 12 {"(", ")"}, 13 {"[", "]"}, 14 {"{", "}"}, 15 {"<", ">"}, 16 {"|", "|"}, 17 {"/", "/"}, 18 {"\"", "\""}, 19 {"'", "'"} 20 ] 21 22 all_sigil_chars = 23 Enum.flat_map(alphabet, fn a -> 24 [a, String.upcase(a)] 25 end) 26 27 all_sigil_starts = Enum.map(all_sigil_chars, fn c -> "~#{c}" end) 28 29 non_removable_normal_sigils = 30 sigil_delimiters 31 |> Enum.flat_map(fn {b, e} -> 32 Enum.flat_map(all_sigil_starts, fn start -> 33 [{"#{start}#{b}", e}, {"#{start}#{b}", e}] 34 end) 35 end) 36 |> Enum.uniq() 37 38 non_removable_normal_sigil_ends = Enum.map(sigil_delimiters, &elem(&1, 1)) 39 40 removable_heredoc_sigil_delimiters = [ 41 {"\"\"\"", "\"\"\""}, 42 {"'''", "'''"} 43 ] 44 45 removable_heredoc_sigils = 46 removable_heredoc_sigil_delimiters 47 |> Enum.flat_map(fn {b, e} -> 48 Enum.flat_map(all_sigil_starts, fn start -> 49 [{"#{start}#{b}", e}, {"#{start}#{b}", e}] 50 end) 51 end) 52 |> Enum.uniq() 53 54 removable_heredoc_sigil_ends = Enum.map(removable_heredoc_sigil_delimiters, &elem(&1, 1)) 55 56 @doc """ 57 Replaces all characters inside heredocs 58 with the equivalent amount of white-space. 59 """ 60 def replace_with_spaces( 61 source_file, 62 replacement \\ " ", 63 interpolation_replacement \\ " ", 64 empty_line_replacement \\ "", 65 filename \\ "nofilename" 66 ) do 67 {source, filename} = SourceFile.source_and_filename(source_file, filename) 68 69 source 70 |> InterpolationHelper.replace_interpolations(interpolation_replacement, filename) 71 |> parse_code("", replacement, empty_line_replacement) 72 end 73 74 defp parse_code("", acc, _replacement, _empty_line_replacement) do 75 acc 76 end 77 78 for {sigil_start, sigil_end} <- removable_heredoc_sigils do 79 defp parse_code( 80 <<unquote(sigil_start)::utf8, t::binary>>, 81 acc, 82 replacement, 83 empty_line_replacement 84 ) do 85 parse_removable_heredoc_sigil( 86 t, 87 acc <> unquote(sigil_start), 88 unquote(sigil_end), 89 replacement, 90 empty_line_replacement, 91 "", 92 byte_size(acc <> unquote(sigil_start)) 93 ) 94 end 95 end 96 97 for {sigil_start, sigil_end} <- non_removable_normal_sigils do 98 defp parse_code( 99 <<unquote(sigil_start)::utf8, t::binary>>, 100 acc, 101 replacement, 102 empty_line_replacement 103 ) do 104 parse_non_removable_normal_sigil( 105 t, 106 acc <> unquote(sigil_start), 107 unquote(sigil_end), 108 replacement, 109 empty_line_replacement 110 ) 111 end 112 end 113 114 defp parse_code(<<"\"\"\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 115 parse_heredoc( 116 t, 117 acc <> ~s("""), 118 replacement, 119 empty_line_replacement, 120 ~s("""), 121 "", 122 byte_size(acc <> ~s(""")) 123 ) 124 end 125 126 defp parse_code(<<"\'\'\'"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 127 parse_heredoc( 128 t, 129 acc <> ~s('''), 130 replacement, 131 empty_line_replacement, 132 ~s('''), 133 "", 134 byte_size(acc <> ~s(''')) 135 ) 136 end 137 138 defp parse_code(<<"\\\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 139 parse_code(t, acc <> "\\\"", replacement, empty_line_replacement) 140 end 141 142 defp parse_code(<<"#"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 143 parse_comment(t, acc <> "#", replacement, empty_line_replacement) 144 end 145 146 defp parse_code(<<"?\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 147 parse_code(t, acc <> "?\"", replacement, empty_line_replacement) 148 end 149 150 defp parse_code(<<"?'"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 151 parse_code(t, acc <> "?\'", replacement, empty_line_replacement) 152 end 153 154 defp parse_code(<<"'"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 155 parse_charlist(t, acc <> "'", replacement, empty_line_replacement) 156 end 157 158 defp parse_code(<<"\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 159 parse_string_literal(t, acc <> "\"", replacement, empty_line_replacement) 160 end 161 162 defp parse_code(<<h::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 163 parse_code(t, acc <> <<h::utf8>>, replacement, empty_line_replacement) 164 end 165 166 defp parse_code(str, acc, replacement, empty_line_replacement) when is_binary(str) do 167 {h, t} = String.next_codepoint(str) 168 169 parse_code(t, acc <> h, replacement, empty_line_replacement) 170 end 171 172 # 173 # Charlists 174 # 175 176 defp parse_charlist("", acc, _replacement, _empty_line_replacement) do 177 acc 178 end 179 180 defp parse_charlist(<<"\\\\"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 181 parse_charlist(t, acc <> "\\\\", replacement, empty_line_replacement) 182 end 183 184 defp parse_charlist(<<"\\\'"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 185 parse_charlist(t, acc <> "\\\'", replacement, empty_line_replacement) 186 end 187 188 defp parse_charlist(<<"\'"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 189 parse_code(t, acc <> "'", replacement, empty_line_replacement) 190 end 191 192 defp parse_charlist(<<"\n"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 193 parse_charlist(t, acc <> "\n", replacement, empty_line_replacement) 194 end 195 196 defp parse_charlist(str, acc, replacement, empty_line_replacement) when is_binary(str) do 197 {h, t} = String.next_codepoint(str) 198 199 parse_comment(t, acc <> h, replacement, empty_line_replacement) 200 end 201 202 # 203 # Comments 204 # 205 206 defp parse_comment("", acc, _replacement, _empty_line_replacement) do 207 acc 208 end 209 210 defp parse_comment(<<"\n"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 211 parse_code(t, acc <> "\n", replacement, empty_line_replacement) 212 end 213 214 defp parse_comment(str, acc, replacement, empty_line_replacement) when is_binary(str) do 215 {h, t} = String.next_codepoint(str) 216 217 parse_comment(t, acc <> h, replacement, empty_line_replacement) 218 end 219 220 # 221 # "Normal" Sigils (e.g. `~S"..."` or `~s(...)`) 222 # 223 224 for sigil_end <- non_removable_normal_sigil_ends do 225 defp parse_non_removable_normal_sigil( 226 "", 227 acc, 228 unquote(sigil_end), 229 _replacement, 230 _empty_line_replacement 231 ) do 232 acc 233 end 234 235 defp parse_non_removable_normal_sigil( 236 <<"\\\\"::utf8, t::binary>>, 237 acc, 238 unquote(sigil_end), 239 replacement, 240 empty_line_replacement 241 ) do 242 parse_non_removable_normal_sigil( 243 t, 244 acc, 245 unquote(sigil_end), 246 replacement, 247 empty_line_replacement 248 ) 249 end 250 251 defp parse_non_removable_normal_sigil( 252 <<unquote("\\#{sigil_end}")::utf8, t::binary>>, 253 acc, 254 unquote(sigil_end), 255 replacement, 256 empty_line_replacement 257 ) do 258 parse_non_removable_normal_sigil( 259 t, 260 acc <> replacement <> replacement, 261 unquote(sigil_end), 262 replacement, 263 empty_line_replacement 264 ) 265 end 266 267 defp parse_non_removable_normal_sigil( 268 <<unquote(sigil_end)::utf8, t::binary>>, 269 acc, 270 unquote(sigil_end), 271 replacement, 272 empty_line_replacement 273 ) do 274 parse_code(t, acc <> unquote(sigil_end), replacement, empty_line_replacement) 275 end 276 277 defp parse_non_removable_normal_sigil( 278 <<"\n"::utf8, t::binary>>, 279 acc, 280 unquote(sigil_end), 281 replacement, 282 empty_line_replacement 283 ) do 284 parse_non_removable_normal_sigil( 285 t, 286 acc <> "\n", 287 unquote(sigil_end), 288 replacement, 289 empty_line_replacement 290 ) 291 end 292 293 defp parse_non_removable_normal_sigil( 294 str, 295 acc, 296 unquote(sigil_end), 297 replacement, 298 empty_line_replacement 299 ) do 300 {h, t} = String.next_codepoint(str) 301 302 parse_non_removable_normal_sigil( 303 t, 304 acc <> h, 305 unquote(sigil_end), 306 replacement, 307 empty_line_replacement 308 ) 309 end 310 end 311 312 # 313 # Removable Sigils (e.g. `~S"""`) 314 # 315 316 for sigil_end <- removable_heredoc_sigil_ends do 317 defp parse_removable_heredoc_sigil( 318 "", 319 acc, 320 unquote(sigil_end), 321 _replacement, 322 _empty_line_replacement, 323 _current_line, 324 _byte_index_heredoc_start 325 ) do 326 acc 327 end 328 329 defp parse_removable_heredoc_sigil( 330 <<"\\\\"::utf8, t::binary>>, 331 acc, 332 unquote(sigil_end), 333 replacement, 334 empty_line_replacement, 335 current_line, 336 byte_index_heredoc_start 337 ) do 338 parse_removable_heredoc_sigil( 339 t, 340 acc, 341 unquote(sigil_end), 342 replacement, 343 empty_line_replacement, 344 current_line, 345 byte_index_heredoc_start 346 ) 347 end 348 349 defp parse_removable_heredoc_sigil( 350 <<unquote("\\#{sigil_end}")::utf8, t::binary>>, 351 acc, 352 unquote(sigil_end), 353 replacement, 354 empty_line_replacement, 355 current_line, 356 byte_index_heredoc_start 357 ) do 358 parse_removable_heredoc_sigil( 359 t, 360 acc <> replacement <> replacement, 361 unquote(sigil_end), 362 replacement, 363 empty_line_replacement, 364 current_line <> replacement <> replacement, 365 byte_index_heredoc_start 366 ) 367 end 368 369 defp parse_removable_heredoc_sigil( 370 <<unquote(sigil_end)::utf8, t::binary>>, 371 acc, 372 unquote(sigil_end), 373 replacement, 374 empty_line_replacement, 375 current_line, 376 byte_index_heredoc_start 377 ) do 378 acc = pad_replaced_heredoc(acc, unquote(sigil_end), current_line, byte_index_heredoc_start) 379 380 parse_code(t, acc <> unquote(sigil_end), replacement, empty_line_replacement) 381 end 382 383 defp parse_removable_heredoc_sigil( 384 <<"\n"::utf8, t::binary>>, 385 acc, 386 unquote(sigil_end), 387 replacement, 388 empty_line_replacement, 389 current_line, 390 byte_index_heredoc_start 391 ) do 392 acc = 393 if current_line == "\n" do 394 acc <> empty_line_replacement 395 else 396 acc 397 end 398 399 parse_removable_heredoc_sigil( 400 t, 401 acc <> "\n", 402 unquote(sigil_end), 403 replacement, 404 empty_line_replacement, 405 "\n", 406 byte_index_heredoc_start 407 ) 408 end 409 410 defp parse_removable_heredoc_sigil( 411 <<_::utf8, t::binary>>, 412 acc, 413 unquote(sigil_end), 414 replacement, 415 empty_line_replacement, 416 current_line, 417 byte_index_heredoc_start 418 ) do 419 parse_removable_heredoc_sigil( 420 t, 421 acc <> replacement, 422 unquote(sigil_end), 423 replacement, 424 empty_line_replacement, 425 current_line <> replacement, 426 byte_index_heredoc_start 427 ) 428 end 429 end 430 431 # 432 # Heredocs 433 # 434 435 defp parse_heredoc( 436 "", 437 acc, 438 _replacement, 439 _empty_line_replacement, 440 _here_doc_delimiter, 441 _current_line, 442 _byte_index_heredoc_start 443 ) do 444 acc 445 end 446 447 defp parse_heredoc( 448 <<"\\\\"::utf8, t::binary>>, 449 acc, 450 replacement, 451 empty_line_replacement, 452 here_doc_delimiter, 453 current_line, 454 byte_index_heredoc_start 455 ) do 456 parse_heredoc( 457 t, 458 acc, 459 replacement, 460 empty_line_replacement, 461 here_doc_delimiter, 462 current_line, 463 byte_index_heredoc_start 464 ) 465 end 466 467 defp parse_heredoc( 468 <<"\\\""::utf8, t::binary>>, 469 acc, 470 replacement, 471 empty_line_replacement, 472 here_doc_delimiter, 473 current_line, 474 byte_index_heredoc_start 475 ) do 476 parse_heredoc( 477 t, 478 acc, 479 replacement, 480 empty_line_replacement, 481 here_doc_delimiter, 482 current_line, 483 byte_index_heredoc_start 484 ) 485 end 486 487 defp parse_heredoc( 488 <<"\"\"\""::utf8, t::binary>>, 489 acc, 490 replacement, 491 empty_line_replacement, 492 "\"\"\"", 493 current_line, 494 byte_index_heredoc_start 495 ) do 496 acc = pad_replaced_heredoc(acc, ~s("""), current_line, byte_index_heredoc_start) 497 498 parse_code(t, acc <> ~s("""), replacement, empty_line_replacement) 499 end 500 501 defp parse_heredoc( 502 <<"\'\'\'"::utf8, t::binary>>, 503 acc, 504 replacement, 505 empty_line_replacement, 506 "\'\'\'", 507 current_line, 508 byte_index_heredoc_start 509 ) do 510 acc = pad_replaced_heredoc(acc, ~s('''), current_line, byte_index_heredoc_start) 511 512 parse_code(t, acc <> ~s('''), replacement, empty_line_replacement) 513 end 514 515 defp parse_heredoc( 516 <<"\n"::utf8, t::binary>>, 517 acc, 518 replacement, 519 empty_line_replacement, 520 here_doc_delimiter, 521 current_line, 522 byte_index_heredoc_start 523 ) do 524 acc = 525 if current_line == "\n" do 526 acc <> empty_line_replacement 527 else 528 acc 529 end 530 531 parse_heredoc( 532 t, 533 acc <> "\n", 534 replacement, 535 empty_line_replacement, 536 here_doc_delimiter, 537 "\n", 538 byte_index_heredoc_start 539 ) 540 end 541 542 defp parse_heredoc( 543 <<_::utf8, t::binary>>, 544 acc, 545 replacement, 546 empty_line_replacement, 547 here_doc_delimiter, 548 current_line, 549 byte_index_heredoc_start 550 ) do 551 parse_heredoc( 552 t, 553 acc <> replacement, 554 replacement, 555 empty_line_replacement, 556 here_doc_delimiter, 557 current_line <> replacement, 558 byte_index_heredoc_start 559 ) 560 end 561 562 # 563 # String Literals 564 # 565 566 defp parse_string_literal("", acc, _replacement, _empty_line_replacement) do 567 acc 568 end 569 570 defp parse_string_literal(<<"\\\\"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 571 parse_string_literal(t, acc <> "\\\\", replacement, empty_line_replacement) 572 end 573 574 defp parse_string_literal(<<"\\\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 575 parse_string_literal(t, acc <> "\\\"", replacement, empty_line_replacement) 576 end 577 578 defp parse_string_literal(<<"\""::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 579 parse_code(t, acc <> ~s("), replacement, empty_line_replacement) 580 end 581 582 defp parse_string_literal(<<"\n"::utf8, t::binary>>, acc, replacement, empty_line_replacement) do 583 parse_string_literal(t, acc <> "\n", replacement, empty_line_replacement) 584 end 585 586 defp parse_string_literal(str, acc, replacement, empty_line_replacement) when is_binary(str) do 587 {h, t} = String.next_codepoint(str) 588 589 parse_string_literal(t, acc <> h, replacement, empty_line_replacement) 590 end 591 592 defp pad_replaced_heredoc(acc, _delimiter, current_line, byte_index_heredoc_start) do 593 no_of_chars_to_replace = String.length(current_line) - 1 594 pad_string = "\n" <> String.pad_leading("", no_of_chars_to_replace) 595 596 start_binary = binary_part(acc, 0, byte_index_heredoc_start) 597 598 new_acc = 599 acc 600 |> binary_part(byte_index_heredoc_start, byte_size(acc) - byte_index_heredoc_start) 601 |> String.replace(~r/\n(.{#{no_of_chars_to_replace}})/, pad_string) 602 603 start_binary <> new_acc 604 end 605 end