strings.ex (12095B)
1 defmodule Credo.Code.Strings do 2 @moduledoc """ 3 This module lets you strip strings from source code. 4 """ 5 6 alias Credo.Code.InterpolationHelper 7 alias Credo.SourceFile 8 9 string_sigil_delimiters = [ 10 {"(", ")"}, 11 {"[", "]"}, 12 {"{", "}"}, 13 {"<", ">"}, 14 {"|", "|"}, 15 {"\"", "\""}, 16 {"'", "'"}, 17 {"/", "/"} 18 ] 19 20 heredocs_sigil_delimiters = [ 21 {"'''", "'''"}, 22 {~s("""), ~s(""")} 23 ] 24 25 all_string_sigils = 26 Enum.flat_map(string_sigil_delimiters, fn {b, e} -> 27 [{"~s#{b}", e}, {"~S#{b}", e}] 28 end) 29 30 all_string_sigil_ends = Enum.map(string_sigil_delimiters, &elem(&1, 1)) 31 32 all_heredocs_sigils = 33 Enum.flat_map(heredocs_sigil_delimiters, fn {b, e} -> 34 [{"~s#{b}", e}, {"~S#{b}", e}] 35 end) 36 37 alphabet = ~w(a b c d e f g h i j k l m n o p q r t u v w x y z) 38 39 sigil_delimiters = [ 40 {"(", ")"}, 41 {"[", "]"}, 42 {"{", "}"}, 43 {"<", ">"}, 44 {"|", "|"}, 45 {"/", "/"}, 46 {"\"\"\"", "\"\"\""}, 47 {"\"", "\""}, 48 {"'", "'"} 49 ] 50 51 all_sigil_chars = 52 Enum.flat_map(alphabet, fn a -> 53 [a, String.upcase(a)] 54 end) 55 56 all_sigil_starts = Enum.map(all_sigil_chars, fn c -> "~#{c}" end) 57 58 removable_sigil_ends = Enum.map(sigil_delimiters, &elem(&1, 1)) 59 60 removable_sigils = 61 sigil_delimiters 62 |> Enum.flat_map(fn {b, e} -> 63 Enum.flat_map(all_sigil_starts, fn start -> 64 [{"#{start}#{b}", e}, {"#{start}#{b}", e}] 65 end) 66 end) 67 |> Enum.uniq() 68 69 @doc """ 70 Replaces all characters inside string literals and string sigils 71 with the equivalent amount of white-space. 72 """ 73 def replace_with_spaces( 74 source_file, 75 replacement \\ " ", 76 interpolation_replacement \\ " ", 77 filename \\ "nofilename" 78 ) do 79 {source, filename} = SourceFile.source_and_filename(source_file, filename) 80 81 source 82 |> InterpolationHelper.replace_interpolations(interpolation_replacement, filename) 83 |> parse_code("", replacement) 84 end 85 86 defp parse_code("", acc, _replacement) do 87 acc 88 end 89 90 for {sigil_start, sigil_end} <- removable_sigils do 91 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 92 parse_removable_sigil( 93 t, 94 acc <> unquote(sigil_start), 95 unquote(sigil_end), 96 replacement 97 ) 98 end 99 end 100 101 for {sigil_start, sigil_end} <- all_heredocs_sigils do 102 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 103 parse_heredoc( 104 t, 105 acc <> unquote(sigil_start), 106 "", 107 replacement, 108 unquote(sigil_end) 109 ) 110 end 111 end 112 113 defp parse_code(<<"\"\"\""::utf8, t::binary>>, acc, replacement) do 114 parse_heredoc(t, acc <> ~s("""), "", replacement, ~s(""")) 115 end 116 117 defp parse_code(<<"\'\'\'"::utf8, t::binary>>, acc, replacement) do 118 parse_heredoc(t, acc <> ~s('''), "", replacement, ~s(''')) 119 end 120 121 for {sigil_start, sigil_end} <- all_string_sigils do 122 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 123 parse_string_sigil( 124 t, 125 acc <> unquote(sigil_start), 126 unquote(sigil_end), 127 replacement 128 ) 129 end 130 end 131 132 defp parse_code(<<"\\\""::utf8, t::binary>>, acc, replacement) do 133 parse_code(t, acc <> "\\\"", replacement) 134 end 135 136 defp parse_code(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 137 parse_code(t, acc <> "\\\'", replacement) 138 end 139 140 defp parse_code(<<"?'"::utf8, t::binary>>, acc, replacement) do 141 parse_code(t, acc <> "?'", replacement) 142 end 143 144 defp parse_code(<<"'"::utf8, t::binary>>, acc, replacement) do 145 parse_charlist(t, acc <> "'", replacement) 146 end 147 148 defp parse_code(<<"?\""::utf8, t::binary>>, acc, replacement) do 149 parse_code(t, acc <> "?\"", replacement) 150 end 151 152 defp parse_code(<<"#"::utf8, t::binary>>, acc, replacement) do 153 parse_comment(t, acc <> "#", replacement) 154 end 155 156 defp parse_code(<<"\""::utf8, t::binary>>, acc, replacement) do 157 parse_string_literal(t, acc <> "\"", replacement) 158 end 159 160 defp parse_code(<<h::utf8, t::binary>>, acc, replacement) do 161 parse_code(t, acc <> <<h::utf8>>, replacement) 162 end 163 164 defp parse_code(str, acc, replacement) when is_binary(str) do 165 {h, t} = String.next_codepoint(str) 166 167 parse_code(t, acc <> h, replacement) 168 end 169 170 # 171 # Charlists 172 # 173 174 defp parse_charlist("", acc, _replacement) do 175 acc 176 end 177 178 defp parse_charlist(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 179 parse_charlist(t, acc <> "\\\\", replacement) 180 end 181 182 defp parse_charlist(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 183 parse_charlist(t, acc <> "\\\'", replacement) 184 end 185 186 defp parse_charlist(<<"\'"::utf8, t::binary>>, acc, replacement) do 187 parse_code(t, acc <> "'", replacement) 188 end 189 190 defp parse_charlist(<<"\n"::utf8, t::binary>>, acc, replacement) do 191 parse_charlist(t, acc <> "\n", replacement) 192 end 193 194 defp parse_charlist(str, acc, replacement) when is_binary(str) do 195 {h, t} = String.next_codepoint(str) 196 197 parse_charlist(t, acc <> h, replacement) 198 end 199 200 # 201 # Comments 202 # 203 204 defp parse_comment("", acc, _replacement) do 205 acc 206 end 207 208 defp parse_comment(<<"\n"::utf8, t::binary>>, acc, replacement) do 209 parse_code(t, acc <> "\n", replacement) 210 end 211 212 defp parse_comment(str, acc, replacement) when is_binary(str) do 213 {h, t} = String.next_codepoint(str) 214 215 parse_comment(t, acc <> h, replacement) 216 end 217 218 # 219 # String Literals 220 # 221 222 defp parse_string_literal("", acc, _replacement) do 223 acc 224 end 225 226 defp parse_string_literal(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 227 parse_string_literal(t, acc, replacement) 228 end 229 230 defp parse_string_literal(<<"\\\""::utf8, t::binary>>, acc, replacement) do 231 parse_string_literal(t, acc, replacement) 232 end 233 234 defp parse_string_literal(<<"\""::utf8, t::binary>>, acc, replacement) do 235 parse_code(t, acc <> ~s("), replacement) 236 end 237 238 defp parse_string_literal(<<"\n"::utf8, t::binary>>, acc, replacement) do 239 parse_string_literal(t, acc <> "\n", replacement) 240 end 241 242 defp parse_string_literal(<<_::utf8, t::binary>>, acc, replacement) do 243 parse_string_literal(t, acc <> replacement, replacement) 244 end 245 246 # 247 # Non-String Sigils 248 # 249 250 for sigil_end <- removable_sigil_ends do 251 defp parse_removable_sigil("", acc, unquote(sigil_end), _replacement) do 252 acc 253 end 254 255 defp parse_removable_sigil( 256 <<"\\"::utf8, s::binary>>, 257 acc, 258 unquote(sigil_end), 259 replacement 260 ) do 261 {h, t} = String.next_codepoint(s) 262 263 parse_removable_sigil(t, acc <> "\\" <> h, unquote(sigil_end), replacement) 264 end 265 266 defp parse_removable_sigil( 267 # \\ 268 <<"\\\\"::utf8, t::binary>>, 269 acc, 270 unquote(sigil_end), 271 replacement 272 ) do 273 parse_removable_sigil(t, acc <> "\\\\", unquote(sigil_end), replacement) 274 end 275 276 defp parse_removable_sigil( 277 <<unquote(sigil_end)::utf8, t::binary>>, 278 acc, 279 unquote(sigil_end), 280 replacement 281 ) do 282 parse_code(t, acc <> unquote(sigil_end), replacement) 283 end 284 285 defp parse_removable_sigil( 286 <<unquote("\\#{sigil_end}")::utf8, t::binary>>, 287 acc, 288 unquote(sigil_end), 289 replacement 290 ) do 291 parse_removable_sigil( 292 t, 293 acc <> unquote("\\#{sigil_end}"), 294 unquote(sigil_end), 295 replacement 296 ) 297 end 298 299 if sigil_end != "\"" do 300 defp parse_removable_sigil( 301 <<"\""::utf8, t::binary>>, 302 acc, 303 unquote(sigil_end), 304 replacement 305 ) do 306 parse_removable_sigil(t, acc <> "\"", unquote(sigil_end), replacement) 307 end 308 end 309 310 defp parse_removable_sigil( 311 <<"\n"::utf8, t::binary>>, 312 acc, 313 unquote(sigil_end), 314 replacement 315 ) do 316 parse_removable_sigil(t, acc <> "\n", unquote(sigil_end), replacement) 317 end 318 319 defp parse_removable_sigil( 320 str, 321 acc, 322 unquote(sigil_end), 323 replacement 324 ) 325 when is_binary(str) do 326 {h, t} = String.next_codepoint(str) 327 328 parse_removable_sigil(t, acc <> h, unquote(sigil_end), replacement) 329 end 330 end 331 332 # 333 # Sigils 334 # 335 336 for sigil_end <- all_string_sigil_ends do 337 defp parse_string_sigil("", acc, unquote(sigil_end), _replacement) do 338 acc 339 end 340 341 defp parse_string_sigil( 342 <<"\\\\"::utf8, t::binary>>, 343 acc, 344 unquote(sigil_end), 345 replacement 346 ) do 347 parse_string_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement) 348 end 349 350 defp parse_string_sigil( 351 <<"\\\""::utf8, t::binary>>, 352 acc, 353 unquote(sigil_end), 354 replacement 355 ) do 356 parse_string_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement) 357 end 358 359 defp parse_string_sigil( 360 <<unquote(sigil_end)::utf8, t::binary>>, 361 acc, 362 unquote(sigil_end), 363 replacement 364 ) do 365 parse_code(t, acc <> unquote(sigil_end), replacement) 366 end 367 368 defp parse_string_sigil( 369 <<"\n"::utf8, t::binary>>, 370 acc, 371 unquote(sigil_end), 372 replacement 373 ) do 374 parse_string_sigil(t, acc <> "\n", unquote(sigil_end), replacement) 375 end 376 377 defp parse_string_sigil( 378 <<_::utf8, t::binary>>, 379 acc, 380 unquote(sigil_end), 381 replacement 382 ) do 383 parse_string_sigil(t, acc <> replacement, unquote(sigil_end), replacement) 384 end 385 end 386 387 # 388 # Heredocs 389 # 390 391 defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, "" = replacement, "\"\"\"") do 392 parse_code(t, acc <> heredoc_acc <> "\"\"\"", replacement) 393 end 394 395 defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, " " = replacement, "\"\"\"") do 396 parse_code(t, acc <> heredoc_acc <> "\"\"\"", replacement) 397 end 398 399 defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, replacement, "\"\"\"") do 400 heredoc_acc = heredoc_acc <> "\"\"\"" 401 402 heredoc_acc = 403 case Regex.run(~r/\n([#{replacement}]+)\"\"\"\z/m, heredoc_acc) do 404 [_, indent_string] -> 405 x = String.length(indent_string) 406 Regex.replace(~r/^(.{#{x}})/m, heredoc_acc, String.pad_trailing("", x)) 407 408 _ -> 409 heredoc_acc 410 end 411 412 parse_code(t, acc <> heredoc_acc, replacement) 413 end 414 415 defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, "" = replacement, "\'\'\'") do 416 parse_code(t, acc <> heredoc_acc <> "\'\'\'", replacement) 417 end 418 419 defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, " " = replacement, "\'\'\'") do 420 parse_code(t, acc <> heredoc_acc <> "\'\'\'", replacement) 421 end 422 423 defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, replacement, "\'\'\'") do 424 heredoc_acc = heredoc_acc <> "\'\'\'" 425 426 heredoc_acc = 427 case Regex.run(~r/\n([#{replacement}]+)\'\'\'\z/m, heredoc_acc) do 428 [_, indent_string] -> 429 x = String.length(indent_string) 430 Regex.replace(~r/^(.{#{x}})/m, heredoc_acc, String.pad_trailing("", x)) 431 432 _ -> 433 heredoc_acc 434 end 435 436 parse_code(t, acc <> heredoc_acc, replacement) 437 end 438 439 defp parse_heredoc("", acc, _heredoc_acc, _replacement, _delimiter) do 440 acc 441 end 442 443 defp parse_heredoc(<<"\\\\"::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do 444 parse_heredoc(t, acc, heredoc_acc, replacement, delimiter) 445 end 446 447 defp parse_heredoc(<<"\\\""::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do 448 parse_heredoc(t, acc, heredoc_acc, replacement, delimiter) 449 end 450 451 defp parse_heredoc(<<"\n"::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do 452 parse_heredoc(t, acc, heredoc_acc <> "\n", replacement, delimiter) 453 end 454 455 defp parse_heredoc(<<_::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do 456 parse_heredoc(t, acc, heredoc_acc <> replacement, replacement, delimiter) 457 end 458 end