sigils.ex (9219B)
1 defmodule Credo.Code.Sigils do 2 @moduledoc """ 3 This module lets you strip sigils from source code. 4 """ 5 6 alias Credo.Code.InterpolationHelper 7 alias Credo.SourceFile 8 9 string_sigil_delimiters = [ 10 {"(", ")"}, 11 {"[", "]"}, 12 {"{", "}"}, 13 {"<", ">"}, 14 {"|", "|"}, 15 {"\"", "\""}, 16 {"'", "'"}, 17 {"/", "/"} 18 ] 19 20 heredocs_sigil_delimiters = [ 21 {"'''", "'''"}, 22 {~s("""), ~s(""")} 23 ] 24 25 all_string_sigils = 26 Enum.flat_map(string_sigil_delimiters, fn {b, e} -> 27 [{"~s#{b}", e}, {"~S#{b}", e}] 28 end) 29 30 all_heredocs_sigils = 31 Enum.flat_map(heredocs_sigil_delimiters, fn {b, e} -> 32 [{"~s#{b}", e}, {"~S#{b}", e}] 33 end) 34 35 alphabet = ~w(a b c d e f g h i j k l m n o p q r t u v w x y z) 36 37 sigil_delimiters = [ 38 {"(", ")"}, 39 {"[", "]"}, 40 {"{", "}"}, 41 {"<", ">"}, 42 {"|", "|"}, 43 {"/", "/"}, 44 {"\"\"\"", "\"\"\""}, 45 {"\"", "\""}, 46 {"'", "'"} 47 ] 48 49 all_sigil_chars = 50 Enum.flat_map(alphabet, fn a -> 51 [a, String.upcase(a)] 52 end) 53 54 all_sigil_starts = Enum.map(all_sigil_chars, fn c -> "~#{c}" end) 55 56 removable_sigil_ends = Enum.map(sigil_delimiters, &elem(&1, 1)) 57 58 removable_sigils = 59 sigil_delimiters 60 |> Enum.flat_map(fn {b, e} -> 61 Enum.flat_map(all_sigil_starts, fn start -> 62 [{"#{start}#{b}", e}, {"#{start}#{b}", e}] 63 end) 64 end) 65 |> Enum.uniq() 66 67 @doc """ 68 Replaces all characters inside all sigils with the equivalent amount of 69 white-space. 70 """ 71 def replace_with_spaces( 72 source_file, 73 replacement \\ " ", 74 interpolation_replacement \\ " ", 75 filename \\ "nofilename" 76 ) do 77 {source, filename} = SourceFile.source_and_filename(source_file, filename) 78 79 source 80 |> InterpolationHelper.replace_interpolations(interpolation_replacement, filename) 81 |> parse_code("", replacement) 82 end 83 84 defp parse_code("", acc, _replacement) do 85 acc 86 end 87 88 defp parse_code(<<"\\\""::utf8, t::binary>>, acc, replacement) do 89 parse_code(t, acc <> "\\\"", replacement) 90 end 91 92 defp parse_code(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 93 parse_code(t, acc <> "\\\'", replacement) 94 end 95 96 defp parse_code(<<"?'"::utf8, t::binary>>, acc, replacement) do 97 parse_code(t, acc <> "?'", replacement) 98 end 99 100 defp parse_code(<<"'"::utf8, t::binary>>, acc, replacement) do 101 parse_charlist(t, acc <> "'", replacement) 102 end 103 104 defp parse_code(<<"?\""::utf8, t::binary>>, acc, replacement) do 105 parse_code(t, acc <> "?\"", replacement) 106 end 107 108 defp parse_code(<<"#"::utf8, t::binary>>, acc, replacement) do 109 parse_comment(t, acc <> "#", replacement) 110 end 111 112 for {sigil_start, sigil_end} <- removable_sigils do 113 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 114 parse_removable_sigil( 115 t, 116 acc <> unquote(sigil_start), 117 unquote(sigil_end), 118 replacement 119 ) 120 end 121 end 122 123 for {sigil_start, sigil_end} <- all_heredocs_sigils do 124 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 125 parse_heredoc( 126 t, 127 acc <> unquote(sigil_start), 128 replacement, 129 unquote(sigil_end) 130 ) 131 end 132 end 133 134 defp parse_code(<<"\"\"\""::utf8, t::binary>>, acc, replacement) do 135 parse_heredoc(t, acc <> ~s("""), replacement, ~s(""")) 136 end 137 138 defp parse_code(<<"\'\'\'"::utf8, t::binary>>, acc, replacement) do 139 parse_heredoc(t, acc <> ~s('''), replacement, ~s(''')) 140 end 141 142 for {sigil_start, sigil_end} <- all_string_sigils do 143 defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do 144 parse_removable_sigil( 145 t, 146 acc <> unquote(sigil_start), 147 unquote(sigil_end), 148 replacement 149 ) 150 end 151 end 152 153 defp parse_code(<<"\""::utf8, t::binary>>, acc, replacement) do 154 parse_string_literal(t, acc <> "\"", replacement) 155 end 156 157 defp parse_code(<<h::utf8, t::binary>>, acc, replacement) do 158 parse_code(t, acc <> <<h::utf8>>, replacement) 159 end 160 161 defp parse_code(str, acc, replacement) when is_binary(str) do 162 {h, t} = String.next_codepoint(str) 163 164 parse_code(t, acc <> h, replacement) 165 end 166 167 # 168 # Charlists 169 # 170 171 defp parse_charlist("", acc, _replacement) do 172 acc 173 end 174 175 defp parse_charlist(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 176 parse_charlist(t, acc <> "\\\\", replacement) 177 end 178 179 defp parse_charlist(<<"\\\'"::utf8, t::binary>>, acc, replacement) do 180 parse_charlist(t, acc <> "\\\'", replacement) 181 end 182 183 defp parse_charlist(<<"\'"::utf8, t::binary>>, acc, replacement) do 184 parse_code(t, acc <> "'", replacement) 185 end 186 187 defp parse_charlist(<<"\n"::utf8, t::binary>>, acc, replacement) do 188 parse_charlist(t, acc <> "\n", replacement) 189 end 190 191 defp parse_charlist(str, acc, replacement) when is_binary(str) do 192 {h, t} = String.next_codepoint(str) 193 194 parse_charlist(t, acc <> h, replacement) 195 end 196 197 # 198 # Comments 199 # 200 201 defp parse_comment("", acc, _replacement) do 202 acc 203 end 204 205 defp parse_comment(<<"\n"::utf8, t::binary>>, acc, replacement) do 206 parse_code(t, acc <> "\n", replacement) 207 end 208 209 defp parse_comment(str, acc, replacement) when is_binary(str) do 210 {h, t} = String.next_codepoint(str) 211 212 parse_comment(t, acc <> h, replacement) 213 end 214 215 # 216 # String Literals 217 # 218 219 defp parse_string_literal("", acc, _replacement) do 220 acc 221 end 222 223 defp parse_string_literal(<<"\\\\"::utf8, t::binary>>, acc, replacement) do 224 parse_string_literal(t, acc <> "\\\\", replacement) 225 end 226 227 defp parse_string_literal(<<"\\\""::utf8, t::binary>>, acc, replacement) do 228 parse_string_literal(t, acc <> "\\\"", replacement) 229 end 230 231 defp parse_string_literal(<<"\""::utf8, t::binary>>, acc, replacement) do 232 parse_code(t, acc <> ~s("), replacement) 233 end 234 235 defp parse_string_literal(<<"\n"::utf8, t::binary>>, acc, replacement) do 236 parse_string_literal(t, acc <> "\n", replacement) 237 end 238 239 defp parse_string_literal(str, acc, replacement) when is_binary(str) do 240 {h, t} = String.next_codepoint(str) 241 parse_string_literal(t, acc <> h, replacement) 242 end 243 244 # 245 # Sigils 246 # 247 248 for sigil_end <- removable_sigil_ends do 249 defp parse_removable_sigil("", acc, unquote(sigil_end), _replacement) do 250 acc 251 end 252 253 defp parse_removable_sigil( 254 <<"\\"::utf8, s::binary>>, 255 acc, 256 unquote(sigil_end), 257 replacement 258 ) do 259 {_h, t} = String.next_codepoint(s) 260 261 parse_removable_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement) 262 end 263 264 defp parse_removable_sigil( 265 # \\ 266 <<"\\\\"::utf8, t::binary>>, 267 acc, 268 unquote(sigil_end), 269 replacement 270 ) do 271 parse_removable_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement) 272 end 273 274 defp parse_removable_sigil( 275 <<unquote("\\#{sigil_end}")::utf8, t::binary>>, 276 acc, 277 unquote(sigil_end), 278 replacement 279 ) do 280 parse_removable_sigil( 281 t, 282 acc <> replacement <> replacement, 283 unquote(sigil_end), 284 replacement 285 ) 286 end 287 288 defp parse_removable_sigil( 289 <<unquote(sigil_end)::utf8, t::binary>>, 290 acc, 291 unquote(sigil_end), 292 replacement 293 ) do 294 parse_code(t, acc <> unquote(sigil_end), replacement) 295 end 296 297 if sigil_end != "\"" do 298 defp parse_removable_sigil( 299 <<"\""::utf8, t::binary>>, 300 acc, 301 unquote(sigil_end), 302 replacement 303 ) do 304 parse_removable_sigil(t, acc <> replacement, unquote(sigil_end), replacement) 305 end 306 end 307 308 defp parse_removable_sigil( 309 <<"\n"::utf8, t::binary>>, 310 acc, 311 unquote(sigil_end), 312 replacement 313 ) do 314 parse_removable_sigil(t, acc <> "\n", unquote(sigil_end), replacement) 315 end 316 317 defp parse_removable_sigil( 318 <<_::utf8, t::binary>>, 319 acc, 320 unquote(sigil_end), 321 replacement 322 ) do 323 parse_removable_sigil( 324 t, 325 acc <> replacement, 326 unquote(sigil_end), 327 replacement 328 ) 329 end 330 end 331 332 # 333 # Heredocs 334 # 335 336 defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, replacement, "\"\"\"") do 337 parse_code(t, acc <> "\"\"\"", replacement) 338 end 339 340 defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, replacement, "\'\'\'") do 341 parse_code(t, acc <> "\'\'\'", replacement) 342 end 343 344 defp parse_heredoc("", acc, _replacement, _delimiter) do 345 acc 346 end 347 348 defp parse_heredoc(<<"\\\\"::utf8, t::binary>>, acc, replacement, delimiter) do 349 parse_heredoc(t, acc <> "\\\\", replacement, delimiter) 350 end 351 352 defp parse_heredoc(<<"\\\""::utf8, t::binary>>, acc, replacement, delimiter) do 353 parse_heredoc(t, acc <> "\\\"", replacement, delimiter) 354 end 355 356 defp parse_heredoc(<<"\n"::utf8, t::binary>>, acc, replacement, delimiter) do 357 parse_heredoc(t, acc <> "\n", replacement, delimiter) 358 end 359 360 defp parse_heredoc(str, acc, replacement, delimiter) when is_binary(str) do 361 {h, t} = String.next_codepoint(str) 362 parse_heredoc(t, acc <> h, replacement, delimiter) 363 end 364 end