zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

strings.ex (12095B)


      1 defmodule Credo.Code.Strings do
      2   @moduledoc """
      3   This module lets you strip strings from source code.
      4   """
      5 
      6   alias Credo.Code.InterpolationHelper
      7   alias Credo.SourceFile
      8 
      9   string_sigil_delimiters = [
     10     {"(", ")"},
     11     {"[", "]"},
     12     {"{", "}"},
     13     {"<", ">"},
     14     {"|", "|"},
     15     {"\"", "\""},
     16     {"'", "'"},
     17     {"/", "/"}
     18   ]
     19 
     20   heredocs_sigil_delimiters = [
     21     {"'''", "'''"},
     22     {~s("""), ~s(""")}
     23   ]
     24 
     25   all_string_sigils =
     26     Enum.flat_map(string_sigil_delimiters, fn {b, e} ->
     27       [{"~s#{b}", e}, {"~S#{b}", e}]
     28     end)
     29 
     30   all_string_sigil_ends = Enum.map(string_sigil_delimiters, &elem(&1, 1))
     31 
     32   all_heredocs_sigils =
     33     Enum.flat_map(heredocs_sigil_delimiters, fn {b, e} ->
     34       [{"~s#{b}", e}, {"~S#{b}", e}]
     35     end)
     36 
     37   alphabet = ~w(a b c d e f g h i j k l m n o p q r t u v w x y z)
     38 
     39   sigil_delimiters = [
     40     {"(", ")"},
     41     {"[", "]"},
     42     {"{", "}"},
     43     {"<", ">"},
     44     {"|", "|"},
     45     {"/", "/"},
     46     {"\"\"\"", "\"\"\""},
     47     {"\"", "\""},
     48     {"'", "'"}
     49   ]
     50 
     51   all_sigil_chars =
     52     Enum.flat_map(alphabet, fn a ->
     53       [a, String.upcase(a)]
     54     end)
     55 
     56   all_sigil_starts = Enum.map(all_sigil_chars, fn c -> "~#{c}" end)
     57 
     58   removable_sigil_ends = Enum.map(sigil_delimiters, &elem(&1, 1))
     59 
     60   removable_sigils =
     61     sigil_delimiters
     62     |> Enum.flat_map(fn {b, e} ->
     63       Enum.flat_map(all_sigil_starts, fn start ->
     64         [{"#{start}#{b}", e}, {"#{start}#{b}", e}]
     65       end)
     66     end)
     67     |> Enum.uniq()
     68 
     69   @doc """
     70   Replaces all characters inside string literals and string sigils
     71   with the equivalent amount of white-space.
     72   """
     73   def replace_with_spaces(
     74         source_file,
     75         replacement \\ " ",
     76         interpolation_replacement \\ " ",
     77         filename \\ "nofilename"
     78       ) do
     79     {source, filename} = SourceFile.source_and_filename(source_file, filename)
     80 
     81     source
     82     |> InterpolationHelper.replace_interpolations(interpolation_replacement, filename)
     83     |> parse_code("", replacement)
     84   end
     85 
     86   defp parse_code("", acc, _replacement) do
     87     acc
     88   end
     89 
     90   for {sigil_start, sigil_end} <- removable_sigils do
     91     defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do
     92       parse_removable_sigil(
     93         t,
     94         acc <> unquote(sigil_start),
     95         unquote(sigil_end),
     96         replacement
     97       )
     98     end
     99   end
    100 
    101   for {sigil_start, sigil_end} <- all_heredocs_sigils do
    102     defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do
    103       parse_heredoc(
    104         t,
    105         acc <> unquote(sigil_start),
    106         "",
    107         replacement,
    108         unquote(sigil_end)
    109       )
    110     end
    111   end
    112 
    113   defp parse_code(<<"\"\"\""::utf8, t::binary>>, acc, replacement) do
    114     parse_heredoc(t, acc <> ~s("""), "", replacement, ~s("""))
    115   end
    116 
    117   defp parse_code(<<"\'\'\'"::utf8, t::binary>>, acc, replacement) do
    118     parse_heredoc(t, acc <> ~s('''), "", replacement, ~s('''))
    119   end
    120 
    121   for {sigil_start, sigil_end} <- all_string_sigils do
    122     defp parse_code(<<unquote(sigil_start)::utf8, t::binary>>, acc, replacement) do
    123       parse_string_sigil(
    124         t,
    125         acc <> unquote(sigil_start),
    126         unquote(sigil_end),
    127         replacement
    128       )
    129     end
    130   end
    131 
    132   defp parse_code(<<"\\\""::utf8, t::binary>>, acc, replacement) do
    133     parse_code(t, acc <> "\\\"", replacement)
    134   end
    135 
    136   defp parse_code(<<"\\\'"::utf8, t::binary>>, acc, replacement) do
    137     parse_code(t, acc <> "\\\'", replacement)
    138   end
    139 
    140   defp parse_code(<<"?'"::utf8, t::binary>>, acc, replacement) do
    141     parse_code(t, acc <> "?'", replacement)
    142   end
    143 
    144   defp parse_code(<<"'"::utf8, t::binary>>, acc, replacement) do
    145     parse_charlist(t, acc <> "'", replacement)
    146   end
    147 
    148   defp parse_code(<<"?\""::utf8, t::binary>>, acc, replacement) do
    149     parse_code(t, acc <> "?\"", replacement)
    150   end
    151 
    152   defp parse_code(<<"#"::utf8, t::binary>>, acc, replacement) do
    153     parse_comment(t, acc <> "#", replacement)
    154   end
    155 
    156   defp parse_code(<<"\""::utf8, t::binary>>, acc, replacement) do
    157     parse_string_literal(t, acc <> "\"", replacement)
    158   end
    159 
    160   defp parse_code(<<h::utf8, t::binary>>, acc, replacement) do
    161     parse_code(t, acc <> <<h::utf8>>, replacement)
    162   end
    163 
    164   defp parse_code(str, acc, replacement) when is_binary(str) do
    165     {h, t} = String.next_codepoint(str)
    166 
    167     parse_code(t, acc <> h, replacement)
    168   end
    169 
    170   #
    171   # Charlists
    172   #
    173 
    174   defp parse_charlist("", acc, _replacement) do
    175     acc
    176   end
    177 
    178   defp parse_charlist(<<"\\\\"::utf8, t::binary>>, acc, replacement) do
    179     parse_charlist(t, acc <> "\\\\", replacement)
    180   end
    181 
    182   defp parse_charlist(<<"\\\'"::utf8, t::binary>>, acc, replacement) do
    183     parse_charlist(t, acc <> "\\\'", replacement)
    184   end
    185 
    186   defp parse_charlist(<<"\'"::utf8, t::binary>>, acc, replacement) do
    187     parse_code(t, acc <> "'", replacement)
    188   end
    189 
    190   defp parse_charlist(<<"\n"::utf8, t::binary>>, acc, replacement) do
    191     parse_charlist(t, acc <> "\n", replacement)
    192   end
    193 
    194   defp parse_charlist(str, acc, replacement) when is_binary(str) do
    195     {h, t} = String.next_codepoint(str)
    196 
    197     parse_charlist(t, acc <> h, replacement)
    198   end
    199 
    200   #
    201   # Comments
    202   #
    203 
    204   defp parse_comment("", acc, _replacement) do
    205     acc
    206   end
    207 
    208   defp parse_comment(<<"\n"::utf8, t::binary>>, acc, replacement) do
    209     parse_code(t, acc <> "\n", replacement)
    210   end
    211 
    212   defp parse_comment(str, acc, replacement) when is_binary(str) do
    213     {h, t} = String.next_codepoint(str)
    214 
    215     parse_comment(t, acc <> h, replacement)
    216   end
    217 
    218   #
    219   # String Literals
    220   #
    221 
    222   defp parse_string_literal("", acc, _replacement) do
    223     acc
    224   end
    225 
    226   defp parse_string_literal(<<"\\\\"::utf8, t::binary>>, acc, replacement) do
    227     parse_string_literal(t, acc, replacement)
    228   end
    229 
    230   defp parse_string_literal(<<"\\\""::utf8, t::binary>>, acc, replacement) do
    231     parse_string_literal(t, acc, replacement)
    232   end
    233 
    234   defp parse_string_literal(<<"\""::utf8, t::binary>>, acc, replacement) do
    235     parse_code(t, acc <> ~s("), replacement)
    236   end
    237 
    238   defp parse_string_literal(<<"\n"::utf8, t::binary>>, acc, replacement) do
    239     parse_string_literal(t, acc <> "\n", replacement)
    240   end
    241 
    242   defp parse_string_literal(<<_::utf8, t::binary>>, acc, replacement) do
    243     parse_string_literal(t, acc <> replacement, replacement)
    244   end
    245 
    246   #
    247   # Non-String Sigils
    248   #
    249 
    250   for sigil_end <- removable_sigil_ends do
    251     defp parse_removable_sigil("", acc, unquote(sigil_end), _replacement) do
    252       acc
    253     end
    254 
    255     defp parse_removable_sigil(
    256            <<"\\"::utf8, s::binary>>,
    257            acc,
    258            unquote(sigil_end),
    259            replacement
    260          ) do
    261       {h, t} = String.next_codepoint(s)
    262 
    263       parse_removable_sigil(t, acc <> "\\" <> h, unquote(sigil_end), replacement)
    264     end
    265 
    266     defp parse_removable_sigil(
    267            # \\
    268            <<"\\\\"::utf8, t::binary>>,
    269            acc,
    270            unquote(sigil_end),
    271            replacement
    272          ) do
    273       parse_removable_sigil(t, acc <> "\\\\", unquote(sigil_end), replacement)
    274     end
    275 
    276     defp parse_removable_sigil(
    277            <<unquote(sigil_end)::utf8, t::binary>>,
    278            acc,
    279            unquote(sigil_end),
    280            replacement
    281          ) do
    282       parse_code(t, acc <> unquote(sigil_end), replacement)
    283     end
    284 
    285     defp parse_removable_sigil(
    286            <<unquote("\\#{sigil_end}")::utf8, t::binary>>,
    287            acc,
    288            unquote(sigil_end),
    289            replacement
    290          ) do
    291       parse_removable_sigil(
    292         t,
    293         acc <> unquote("\\#{sigil_end}"),
    294         unquote(sigil_end),
    295         replacement
    296       )
    297     end
    298 
    299     if sigil_end != "\"" do
    300       defp parse_removable_sigil(
    301              <<"\""::utf8, t::binary>>,
    302              acc,
    303              unquote(sigil_end),
    304              replacement
    305            ) do
    306         parse_removable_sigil(t, acc <> "\"", unquote(sigil_end), replacement)
    307       end
    308     end
    309 
    310     defp parse_removable_sigil(
    311            <<"\n"::utf8, t::binary>>,
    312            acc,
    313            unquote(sigil_end),
    314            replacement
    315          ) do
    316       parse_removable_sigil(t, acc <> "\n", unquote(sigil_end), replacement)
    317     end
    318 
    319     defp parse_removable_sigil(
    320            str,
    321            acc,
    322            unquote(sigil_end),
    323            replacement
    324          )
    325          when is_binary(str) do
    326       {h, t} = String.next_codepoint(str)
    327 
    328       parse_removable_sigil(t, acc <> h, unquote(sigil_end), replacement)
    329     end
    330   end
    331 
    332   #
    333   # Sigils
    334   #
    335 
    336   for sigil_end <- all_string_sigil_ends do
    337     defp parse_string_sigil("", acc, unquote(sigil_end), _replacement) do
    338       acc
    339     end
    340 
    341     defp parse_string_sigil(
    342            <<"\\\\"::utf8, t::binary>>,
    343            acc,
    344            unquote(sigil_end),
    345            replacement
    346          ) do
    347       parse_string_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement)
    348     end
    349 
    350     defp parse_string_sigil(
    351            <<"\\\""::utf8, t::binary>>,
    352            acc,
    353            unquote(sigil_end),
    354            replacement
    355          ) do
    356       parse_string_sigil(t, acc <> replacement <> replacement, unquote(sigil_end), replacement)
    357     end
    358 
    359     defp parse_string_sigil(
    360            <<unquote(sigil_end)::utf8, t::binary>>,
    361            acc,
    362            unquote(sigil_end),
    363            replacement
    364          ) do
    365       parse_code(t, acc <> unquote(sigil_end), replacement)
    366     end
    367 
    368     defp parse_string_sigil(
    369            <<"\n"::utf8, t::binary>>,
    370            acc,
    371            unquote(sigil_end),
    372            replacement
    373          ) do
    374       parse_string_sigil(t, acc <> "\n", unquote(sigil_end), replacement)
    375     end
    376 
    377     defp parse_string_sigil(
    378            <<_::utf8, t::binary>>,
    379            acc,
    380            unquote(sigil_end),
    381            replacement
    382          ) do
    383       parse_string_sigil(t, acc <> replacement, unquote(sigil_end), replacement)
    384     end
    385   end
    386 
    387   #
    388   # Heredocs
    389   #
    390 
    391   defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, "" = replacement, "\"\"\"") do
    392     parse_code(t, acc <> heredoc_acc <> "\"\"\"", replacement)
    393   end
    394 
    395   defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, " " = replacement, "\"\"\"") do
    396     parse_code(t, acc <> heredoc_acc <> "\"\"\"", replacement)
    397   end
    398 
    399   defp parse_heredoc(<<"\"\"\""::utf8, t::binary>>, acc, heredoc_acc, replacement, "\"\"\"") do
    400     heredoc_acc = heredoc_acc <> "\"\"\""
    401 
    402     heredoc_acc =
    403       case Regex.run(~r/\n([#{replacement}]+)\"\"\"\z/m, heredoc_acc) do
    404         [_, indent_string] ->
    405           x = String.length(indent_string)
    406           Regex.replace(~r/^(.{#{x}})/m, heredoc_acc, String.pad_trailing("", x))
    407 
    408         _ ->
    409           heredoc_acc
    410       end
    411 
    412     parse_code(t, acc <> heredoc_acc, replacement)
    413   end
    414 
    415   defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, "" = replacement, "\'\'\'") do
    416     parse_code(t, acc <> heredoc_acc <> "\'\'\'", replacement)
    417   end
    418 
    419   defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, " " = replacement, "\'\'\'") do
    420     parse_code(t, acc <> heredoc_acc <> "\'\'\'", replacement)
    421   end
    422 
    423   defp parse_heredoc(<<"\'\'\'"::utf8, t::binary>>, acc, heredoc_acc, replacement, "\'\'\'") do
    424     heredoc_acc = heredoc_acc <> "\'\'\'"
    425 
    426     heredoc_acc =
    427       case Regex.run(~r/\n([#{replacement}]+)\'\'\'\z/m, heredoc_acc) do
    428         [_, indent_string] ->
    429           x = String.length(indent_string)
    430           Regex.replace(~r/^(.{#{x}})/m, heredoc_acc, String.pad_trailing("", x))
    431 
    432         _ ->
    433           heredoc_acc
    434       end
    435 
    436     parse_code(t, acc <> heredoc_acc, replacement)
    437   end
    438 
    439   defp parse_heredoc("", acc, _heredoc_acc, _replacement, _delimiter) do
    440     acc
    441   end
    442 
    443   defp parse_heredoc(<<"\\\\"::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do
    444     parse_heredoc(t, acc, heredoc_acc, replacement, delimiter)
    445   end
    446 
    447   defp parse_heredoc(<<"\\\""::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do
    448     parse_heredoc(t, acc, heredoc_acc, replacement, delimiter)
    449   end
    450 
    451   defp parse_heredoc(<<"\n"::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do
    452     parse_heredoc(t, acc, heredoc_acc <> "\n", replacement, delimiter)
    453   end
    454 
    455   defp parse_heredoc(<<_::utf8, t::binary>>, acc, heredoc_acc, replacement, delimiter) do
    456     parse_heredoc(t, acc, heredoc_acc <> replacement, replacement, delimiter)
    457   end
    458 end