charlists.ex (10383B)
1 defmodule Credo.Code.Charlists do 2 @moduledoc """ 3 This module lets you strip charlists from source code. 4 """ 5 6 alias Credo.Code.InterpolationHelper 7 alias Credo.SourceFile 8 9 string_sigil_delimiters = [ 10 {"(", ")"}, 11 {"[", "]"}, 12 {"{", "}"}, 13 {"<", ">"}, 14 {"|", "|"}, 15 {"\"", "\""}, 16 {"'", "'"} 17 ] 18 19 heredocs_sigil_delimiters = [ 20 {"'''", "'''"}, 21 {~s("""), ~s(""")} 22 ] 23 24 all_string_sigils = 25 Enum.flat_map(string_sigil_delimiters, fn {b, e} -> 26 [{"~s#{b}", e}, {"~S#{b}", e}] 27 end) 28 29 all_string_sigil_ends = Enum.map(string_sigil_delimiters, &elem(&1, 1)) 30 31 all_heredocs_sigils = 32 Enum.flat_map(heredocs_sigil_delimiters, fn {b, e} -> 33 [{"~s#{b}", e}, {"~S#{b}", e}] 34 end) 35 36 alphabet = ~w(a b c d e f g h i j k l m n o p q r t u v w x y z) 37 38 sigil_delimiters = [ 39 {"(", ")"}, 40 {"[", "]"}, 41 {"{", "}"}, 42 {"<", ">"}, 43 {"|", "|"}, 44 {"/", "/"}, 45 {"\"\"\"", "\"\"\""}, 46 {"\"", "\""}, 47 {"'", "'"} 48 ] 49 50 all_sigil_chars = 51 Enum.flat_map(alphabet, fn a -> 52 [a, String.upcase(a)] 53 end) 54 55 all_sigil_starts = Enum.map(all_sigil_chars, fn c -> "~#{c}" end) 56 57 removable_sigil_ends = Enum.map(sigil_delimiters, &elem(&1, 1)) 58 59 removable_sigils = 60 sigil_delimiters 61 |> Enum.flat_map(fn {b, e} -> 62 Enum.flat_map(all_sigil_starts, fn start -> 63 [{"#{start}#{b}", e}, {"#{start}#{b}", e}] 64 end) 65 end) 66 |> Enum.uniq() 67 68 @doc """ 69 Replaces all characters inside charlists with the equivalent amount of 70 white-space. 71 """ 72 def replace_with_spaces( 73 source_file, 74 replacement \\ " ", 75 interpolation_replacement \\ " ", 76 filename \\ "nofilename" 77 ) do 78 {source, filename} = SourceFile.source_and_filename(source_file, filename) 79 80 source 81 |> InterpolationHelper.replace_interpolations(interpolation_replacement, filename) 82 |> parse_code("", replacement) 83 end 84 85 defp parse_code("", acc, _replacement) do 86 acc 87 end 88 89 for {sigil_start, sigil_end} <- removable_sigils do 90 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 91 parse_removable_sigil( 92 t, 93 acc <> unquote(sigil_start), 94 unquote(sigil_end), 95 replacement 96 ) 97 end 98 end 99 100 for {sigil_start, sigil_end} <- all_heredocs_sigils do 101 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 102 parse_heredoc( 103 t, 104 acc <> unquote(sigil_start), 105 replacement, 106 unquote(sigil_end) 107 ) 108 end 109 end 110 111 defp parse_code(<<"\"\"\""::utf8, t::binary>>, acc, replacement) do 112 parse_heredoc(t, acc <> ~s("""), replacement, ~s(""")) 113 end 114 115 defp parse_code(<<"\'\'\'"::utf8, t::binary>>, acc, replacement) do 116 parse_heredoc(t, acc <> ~s('''), replacement, ~s(''')) 117 end 118 119 for {sigil_start, sigil_end} <- all_string_sigils do 120 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 121 parse_string_sigil( 122 t, 123 acc <> unquote(sigil_start), 124 unquote(sigil_end), 125 replacement 126 ) 127 end 128 end 129 130 defp parse_code(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 131 parse_code(t, acc <> "\\\'", replacement) 132 end 133 134 defp parse_code(<<"?'"::utf8, t::binary>>, acc, replacement) do 135 parse_code(t, acc <> "?'", replacement) 136 end 137 138 defp parse_code(<<"?\""::utf8, t::binary>>, acc, replacement) do 139 parse_code(t, acc <> "?\"", replacement) 140 end 141 142 defp parse_code(<<"'"::utf8, t::binary>>, acc, replacement) do 143 parse_charlist(t, acc <> "'", replacement) 144 end 145 146 defp parse_code(<<"#"::utf8, t::binary>>, acc, replacement) do 147 parse_comment(t, acc <> "#", replacement) 148 end 149 150 defp parse_code(<<"\""::utf8, t::binary>>, acc, replacement) do 151 parse_string_literal(t, acc <> "\"", replacement) 152 end 153 154 defp parse_code(<<h::utf8, t::binary>>, acc, replacement) do 155 parse_code(t, acc <> <<h::utf8>>, replacement) 156 end 157 158 defp parse_code(str, acc, replacement) when is_binary(str) do 159 {h, t} = String.next_codepoint(str) 160 161 parse_code(t, acc <> h, replacement) 162 end 163 164 # 165 # Comments 166 # 167 168 defp parse_comment("", acc, _replacement) do 169 acc 170 end 171 172 defp parse_comment(<<"\n"::utf8, t::binary>>, acc, replacement) do 173 parse_code(t, acc <> "\n", replacement) 174 end 175 176 defp parse_comment(str, acc, replacement) when is_binary(str) do 177 {h, t} = String.next_codepoint(str) 178 179 parse_comment(t, acc <> h, replacement) 180 end 181 182 # 183 # String Literals 184 # 185 186 defp parse_string_literal("", acc, _replacement) do 187 acc 188 end 189 190 defp parse_string_literal(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 191 parse_string_literal(t, acc, replacement) 192 end 193 194 defp parse_string_literal(<<"\\\""::utf8, t::binary>>, acc, replacement) do 195 parse_string_literal(t, acc, replacement) 196 end 197 198 defp parse_string_literal(<<"\""::utf8, t::binary>>, acc, replacement) do 199 parse_code(t, acc <> ~s("), replacement) 200 end 201 202 defp parse_string_literal(<<"\n"::utf8, t::binary>>, acc, replacement) do 203 parse_string_literal(t, acc <> "\n", replacement) 204 end 205 206 defp parse_string_literal(str, acc, replacement) when is_binary(str) do 207 {h, t} = String.next_codepoint(str) 208 parse_string_literal(t, acc <> h, replacement) 209 end 210 211 # 212 # Charlists 213 # 214 215 defp parse_charlist("", acc, _replacement) do 216 acc 217 end 218 219 defp parse_charlist(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 220 parse_charlist(t, acc <> replacement <> replacement, replacement) 221 end 222 223 defp parse_charlist(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 224 parse_charlist(t, acc, replacement) 225 end 226 227 defp parse_charlist(<<"\'"::utf8, t::binary>>, acc, replacement) do 228 parse_code(t, acc <> "'", replacement) 229 end 230 231 defp parse_charlist(<<"\n"::utf8, t::binary>>, acc, replacement) do 232 parse_charlist(t, acc <> "\n", replacement) 233 end 234 235 defp parse_charlist(<<_::utf8, t::binary>>, acc, replacement) do 236 parse_charlist(t, acc <> replacement, replacement) 237 end 238 239 # 240 # Non-String Sigils 241 # 242 243 for sigil_end <- removable_sigil_ends do 244 defp parse_removable_sigil("", acc, unquote(sigil_end), _replacement) do 245 acc 246 end 247 248 defp parse_removable_sigil( 249 <<"\\"::utf8, s::binary>>, 250 acc, 251 unquote(sigil_end), 252 replacement 253 ) do 254 {h, t} = String.next_codepoint(s) 255 256 parse_removable_sigil(t, acc <> "\\" <> h, unquote(sigil_end), replacement) 257 end 258 259 defp parse_removable_sigil( 260 # \\ 261 <<"\\\\"::utf8, t::binary>>, 262 acc, 263 unquote(sigil_end), 264 replacement 265 ) do 266 parse_removable_sigil(t, acc <> "\\\\", unquote(sigil_end), replacement) 267 end 268 269 if sigil_end != "\"" do 270 defp parse_removable_sigil( 271 <<"\""::utf8, t::binary>>, 272 acc, 273 unquote(sigil_end), 274 replacement 275 ) do 276 parse_removable_sigil(t, acc <> "\"", unquote(sigil_end), replacement) 277 end 278 end 279 280 defp parse_removable_sigil( 281 <<unquote("\\#{sigil_end}")::utf8, t::binary>>, 282 acc, 283 unquote(sigil_end), 284 replacement 285 ) do 286 parse_removable_sigil( 287 t, 288 acc <> unquote("\\#{sigil_end}"), 289 unquote(sigil_end), 290 replacement 291 ) 292 end 293 294 defp parse_removable_sigil( 295 <<unquote(sigil_end)::utf8, t::binary>>, 296 acc, 297 unquote(sigil_end), 298 replacement 299 ) do 300 parse_code(t, acc <> unquote(sigil_end), replacement) 301 end 302 303 defp parse_removable_sigil( 304 <<"\n"::utf8, t::binary>>, 305 acc, 306 unquote(sigil_end), 307 replacement 308 ) do 309 parse_removable_sigil(t, acc <> "\n", unquote(sigil_end), replacement) 310 end 311 312 defp parse_removable_sigil( 313 str, 314 acc, 315 unquote(sigil_end), 316 replacement 317 ) 318 when is_binary(str) do 319 {h, t} = String.next_codepoint(str) 320 321 parse_removable_sigil(t, acc <> h, unquote(sigil_end), replacement) 322 end 323 end 324 325 # 326 # Sigils 327 # 328 329 for sigil_end <- all_string_sigil_ends do 330 defp parse_string_sigil("", acc, unquote(sigil_end), _replacement) do 331 acc 332 end 333 334 defp parse_string_sigil( 335 <<"\\\\"::utf8, t::binary>>, 336 acc, 337 unquote(sigil_end), 338 replacement 339 ) do 340 parse_string_sigil(t, acc, unquote(sigil_end), replacement) 341 end 342 343 defp parse_string_sigil( 344 <<"\\\""::utf8, t::binary>>, 345 acc, 346 unquote(sigil_end), 347 replacement 348 ) do 349 parse_string_sigil(t, acc, unquote(sigil_end), replacement) 350 end 351 352 defp parse_string_sigil( 353 <<unquote(sigil_end)::utf8, t::binary>>, 354 acc, 355 unquote(sigil_end), 356 replacement 357 ) do 358 parse_code(t, acc <> unquote(sigil_end), replacement) 359 end 360 361 defp parse_string_sigil( 362 <<"\n"::utf8, t::binary>>, 363 acc, 364 unquote(sigil_end), 365 replacement 366 ) do 367 parse_string_sigil(t, acc <> "\n", unquote(sigil_end), replacement) 368 end 369 370 defp parse_string_sigil( 371 str, 372 acc, 373 unquote(sigil_end), 374 replacement 375 ) do 376 {h, t} = String.next_codepoint(str) 377 378 parse_string_sigil(t, acc <> h, unquote(sigil_end), replacement) 379 end 380 end 381 382 # 383 # Heredocs 384 # 385 386 defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, replacement, "\"\"\"") do 387 parse_code(t, acc <> ~s("""), replacement) 388 end 389 390 defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, replacement, "\'\'\'") do 391 parse_code(t, acc <> ~s('''), replacement) 392 end 393 394 defp parse_heredoc("", acc, _replacement, _delimiter) do 395 acc 396 end 397 398 defp parse_heredoc(<<"\\\\"::utf8, t::binary>>, acc, replacement, delimiter) do 399 parse_heredoc(t, acc, replacement, delimiter) 400 end 401 402 defp parse_heredoc(<<"\\\""::utf8, t::binary>>, acc, replacement, delimiter) do 403 parse_heredoc(t, acc, replacement, delimiter) 404 end 405 406 defp parse_heredoc(<<"\n"::utf8, t::binary>>, acc, replacement, delimiter) do 407 parse_heredoc(t, acc <> "\n", replacement, delimiter) 408 end 409 410 defp parse_heredoc(str, acc, replacement, delimiter) do 411 {h, t} = String.next_codepoint(str) 412 413 parse_heredoc(t, acc <> h, replacement, delimiter) 414 end 415 end