zf

zenflows testing
git clone https://s.sonu.ch/~srfsh/zf.git
Log | Files | Refs | Submodules | README | LICENSE

duplicated_code.ex (8636B)


      1 defmodule Credo.Check.Design.DuplicatedCode do
      2   use Credo.Check,
      3     run_on_all: true,
      4     base_priority: :higher,
      5     tags: [:controversial],
      6     param_defaults: [
      7       mass_threshold: 40,
      8       nodes_threshold: 2,
      9       excluded_macros: []
     10     ],
     11     explanations: [
     12       check: """
     13       Code should not be copy-pasted in a codebase when there is room to abstract
     14       the copied functionality in a meaningful way.
     15 
     16       That said, you should by no means "ABSTRACT ALL THE THINGS!".
     17 
     18       Sometimes it can serve a purpose to have code be explicit in two places, even
     19       if it means the snippets are nearly identical. A good example for this are
     20       Database Adapters in a project like Ecto, where you might have nearly
     21       identical functions for things like `order_by` or `limit` in both the
     22       Postgres and MySQL adapters.
     23 
     24       In this case, introducing an `AbstractAdapter` just to avoid code duplication
     25       might cause more trouble down the line than having a bit of duplicated code.
     26 
     27       Like all `Software Design` issues, this is just advice and might not be
     28       applicable to your project/situation.
     29       """,
     30       params: [
     31         mass_threshold:
     32           "The minimum mass which a part of code has to have to qualify for this check.",
     33         nodes_threshold: "The number of nodes that need to be found to raise an issue.",
     34         excluded_macros: "List of macros to be excluded for this check."
     35       ]
     36     ]
     37 
     38   alias Credo.SourceFile
     39 
     40   @doc false
     41   @impl true
     42   def run_on_all_source_files(exec, source_files, params) do
     43     mass_threshold = Params.get(params, :mass_threshold, __MODULE__)
     44     nodes_threshold = Params.get(params, :nodes_threshold, __MODULE__)
     45 
     46     source_files
     47     |> duplicate_nodes(mass_threshold)
     48     |> append_issues_via_issue_service(source_files, nodes_threshold, params, exec)
     49 
     50     :ok
     51   end
     52 
     53   defp append_issues_via_issue_service(found_hashes, source_files, nodes_threshold, params, exec)
     54        when is_map(found_hashes) do
     55     found_hashes
     56     |> Enum.map(
     57       &Task.async(fn ->
     58         do_append_issues_via_issue_service(
     59           &1,
     60           source_files,
     61           nodes_threshold,
     62           params,
     63           exec
     64         )
     65       end)
     66     )
     67     |> Enum.map(&Task.await(&1, :infinity))
     68   end
     69 
     70   defp do_append_issues_via_issue_service(
     71          {_hash, nodes},
     72          source_files,
     73          nodes_threshold,
     74          params,
     75          exec
     76        ) do
     77     filename_map = nodes |> Enum.map(&{&1.filename, true}) |> Enum.into(%{})
     78 
     79     source_files
     80     |> Enum.filter(fn source_file -> filename_map[source_file.filename] end)
     81     |> Enum.each(&new_issue_for_members(&1, nodes_threshold, nodes, params, exec))
     82   end
     83 
     84   defp new_issue_for_members(source_file, nodes_threshold, nodes, params, exec) do
     85     this_node = Enum.find(nodes, &(&1.filename == source_file.filename))
     86     other_nodes = List.delete(nodes, this_node)
     87     issue_meta = IssueMeta.for(source_file, params)
     88     issue = issue_for(issue_meta, this_node, other_nodes, nodes_threshold, params)
     89 
     90     if issue do
     91       Credo.Execution.ExecutionIssues.append(exec, source_file, issue)
     92     end
     93   end
     94 
     95   defp duplicate_nodes(source_files, mass_threshold) do
     96     chunked_nodes =
     97       source_files
     98       |> Enum.chunk_every(30)
     99       |> Enum.map(&Task.async(fn -> calculate_hashes_for_chunk(&1, mass_threshold) end))
    100       |> Enum.map(&Task.await(&1, :infinity))
    101 
    102     nodes =
    103       Enum.reduce(chunked_nodes, %{}, fn current_hashes, existing_hashes ->
    104         Map.merge(existing_hashes, current_hashes, fn _hash, node_items1, node_items2 ->
    105           node_items1 ++ node_items2
    106         end)
    107       end)
    108 
    109     nodes
    110     |> prune_hashes
    111     |> add_masses
    112   end
    113 
    114   defp calculate_hashes_for_chunk(source_files, mass_threshold) do
    115     Enum.reduce(source_files, %{}, fn source_file, acc ->
    116       ast = SourceFile.ast(source_file)
    117 
    118       calculate_hashes(ast, acc, source_file.filename, mass_threshold)
    119     end)
    120   end
    121 
    122   def add_masses(found_hashes) do
    123     Enum.into(found_hashes, %{}, &add_mass_to_subnode/1)
    124   end
    125 
    126   defp add_mass_to_subnode({hash, node_items}) do
    127     node_items =
    128       Enum.map(node_items, fn node_item ->
    129         %{node_item | mass: mass(node_item.node)}
    130       end)
    131 
    132     {hash, node_items}
    133   end
    134 
    135   @doc """
    136   Takes a map of hashes to nodes and prunes those nodes that are just
    137   subnodes of others in the same set.
    138 
    139   Returns the resulting map.
    140   """
    141   def prune_hashes(
    142         given_hashes,
    143         mass_threshold \\ param_defaults()[:mass_threshold]
    144       ) do
    145     # remove entries containing a single node
    146     hashes_with_multiple_nodes =
    147       given_hashes
    148       |> Enum.filter(fn {_hash, node_items} -> Enum.count(node_items) > 1 end)
    149       |> Enum.into(%{})
    150 
    151     hashes_to_prune =
    152       Enum.flat_map(
    153         hashes_with_multiple_nodes,
    154         &collect_subhashes(&1, mass_threshold)
    155       )
    156 
    157     delete_keys(hashes_to_prune, hashes_with_multiple_nodes)
    158   end
    159 
    160   defp delete_keys([], acc), do: acc
    161 
    162   defp delete_keys([head | tail], acc) do
    163     delete_keys(tail, Map.delete(acc, head))
    164   end
    165 
    166   defp collect_subhashes({_hash, node_items}, mass_threshold) do
    167     %{node: first_node, filename: filename} = Enum.at(node_items, 0)
    168 
    169     my_hash = first_node |> Credo.Code.remove_metadata() |> to_hash
    170     # don't count self
    171     subhashes =
    172       first_node
    173       |> calculate_hashes(%{}, filename, mass_threshold)
    174       |> Map.keys()
    175       |> List.delete(my_hash)
    176 
    177     subhashes
    178   end
    179 
    180   @doc """
    181   Calculates hash values for all sub nodes in a given +ast+.
    182 
    183   Returns a map with the hashes as keys and the nodes as values.
    184   """
    185   def calculate_hashes(
    186         ast,
    187         existing_hashes \\ %{},
    188         filename \\ "foo.ex",
    189         mass_threshold \\ param_defaults()[:mass_threshold]
    190       )
    191       when is_map(existing_hashes) do
    192     Credo.Code.prewalk(
    193       ast,
    194       &collect_hashes(&1, &2, filename, mass_threshold),
    195       existing_hashes
    196     )
    197   end
    198 
    199   defp collect_hashes(ast, existing_hashes, filename, mass_threshold) do
    200     if mass(ast) < mass_threshold do
    201       {ast, existing_hashes}
    202     else
    203       hash = ast |> Credo.Code.remove_metadata() |> to_hash
    204       node_item = %{node: ast, filename: filename, mass: nil}
    205       node_items = Map.get(existing_hashes, hash, [])
    206 
    207       updated_hashes = Map.put(existing_hashes, hash, node_items ++ [node_item])
    208 
    209       {ast, updated_hashes}
    210     end
    211   end
    212 
    213   @doc """
    214   Returns a hash-value for a given +ast+.
    215   """
    216   def to_hash(ast) do
    217     string =
    218       ast
    219       |> Inspect.Algebra.to_doc(%Inspect.Opts{})
    220       |> Inspect.Algebra.format(80)
    221       |> Enum.join("")
    222 
    223     :sha256
    224     |> :crypto.hash(string)
    225     |> Base.encode16()
    226   end
    227 
    228   @doc """
    229   Returns the mass (count of instructions) for an AST.
    230   """
    231   def mass(ast) do
    232     Credo.Code.prewalk(ast, &calc_mass/2, 0)
    233   end
    234 
    235   defp calc_mass(ast, acc) when is_tuple(ast) do
    236     {ast, acc + 1}
    237   end
    238 
    239   defp calc_mass(ast, acc) do
    240     {ast, acc}
    241   end
    242 
    243   defp issue_for(issue_meta, this_node, other_nodes, nodes_threshold, params) do
    244     if Enum.count(other_nodes) >= nodes_threshold - 1 do
    245       filenames =
    246         Enum.map_join(other_nodes, ", ", fn other_node ->
    247           "#{other_node.filename}:#{line_no_for(other_node.node)}"
    248         end)
    249 
    250       node_mass = this_node.mass
    251       line_no = line_no_for(this_node.node)
    252       excluded_macros = params[:excluded_macros] || []
    253 
    254       if create_issue?(this_node.node, excluded_macros) do
    255         format_issue(
    256           issue_meta,
    257           message: "Duplicate code found in #{filenames} (mass: #{node_mass}).",
    258           line_no: line_no,
    259           severity: Severity.compute(1 + Enum.count(other_nodes), 1)
    260         )
    261       end
    262     end
    263   end
    264 
    265   # ignore similar module attributes, no matter how complex
    266   def create_issue?({:@, _, _}, _), do: false
    267 
    268   def create_issue?([do: {atom, _, arguments}], excluded_macros)
    269       when is_atom(atom) and is_list(arguments) do
    270     !Enum.member?(excluded_macros, atom)
    271   end
    272 
    273   def create_issue?({atom, _, arguments}, excluded_macros)
    274       when is_atom(atom) and is_list(arguments) do
    275     !Enum.member?(excluded_macros, atom)
    276   end
    277 
    278   def create_issue?(_ast, _), do: true
    279 
    280   # TODO: Put in AST helper
    281 
    282   def line_no_for({:__block__, _meta, arguments}) do
    283     line_no_for(arguments)
    284   end
    285 
    286   def line_no_for({:do, arguments}) do
    287     line_no_for(arguments)
    288   end
    289 
    290   def line_no_for({atom, meta, _}) when is_atom(atom) do
    291     meta[:line]
    292   end
    293 
    294   def line_no_for(list) when is_list(list) do
    295     Enum.find_value(list, &line_no_for/1)
    296   end
    297 
    298   def line_no_for(nil), do: nil
    299 
    300   def line_no_for(block) do
    301     block
    302     |> Credo.Code.Block.do_block_for!()
    303     |> line_no_for
    304   end
    305 end