duplicated_code.ex (8636B)
1 defmodule Credo.Check.Design.DuplicatedCode do 2 use Credo.Check, 3 run_on_all: true, 4 base_priority: :higher, 5 tags: [:controversial], 6 param_defaults: [ 7 mass_threshold: 40, 8 nodes_threshold: 2, 9 excluded_macros: [] 10 ], 11 explanations: [ 12 check: """ 13 Code should not be copy-pasted in a codebase when there is room to abstract 14 the copied functionality in a meaningful way. 15 16 That said, you should by no means "ABSTRACT ALL THE THINGS!". 17 18 Sometimes it can serve a purpose to have code be explicit in two places, even 19 if it means the snippets are nearly identical. A good example for this are 20 Database Adapters in a project like Ecto, where you might have nearly 21 identical functions for things like `order_by` or `limit` in both the 22 Postgres and MySQL adapters. 23 24 In this case, introducing an `AbstractAdapter` just to avoid code duplication 25 might cause more trouble down the line than having a bit of duplicated code. 26 27 Like all `Software Design` issues, this is just advice and might not be 28 applicable to your project/situation. 29 """, 30 params: [ 31 mass_threshold: 32 "The minimum mass which a part of code has to have to qualify for this check.", 33 nodes_threshold: "The number of nodes that need to be found to raise an issue.", 34 excluded_macros: "List of macros to be excluded for this check." 35 ] 36 ] 37 38 alias Credo.SourceFile 39 40 @doc false 41 @impl true 42 def run_on_all_source_files(exec, source_files, params) do 43 mass_threshold = Params.get(params, :mass_threshold, __MODULE__) 44 nodes_threshold = Params.get(params, :nodes_threshold, __MODULE__) 45 46 source_files 47 |> duplicate_nodes(mass_threshold) 48 |> append_issues_via_issue_service(source_files, nodes_threshold, params, exec) 49 50 :ok 51 end 52 53 defp append_issues_via_issue_service(found_hashes, source_files, nodes_threshold, params, exec) 54 when is_map(found_hashes) do 55 found_hashes 56 |> Enum.map( 57 &Task.async(fn -> 58 do_append_issues_via_issue_service( 59 &1, 60 source_files, 61 nodes_threshold, 62 params, 63 exec 64 ) 65 end) 66 ) 67 |> Enum.map(&Task.await(&1, :infinity)) 68 end 69 70 defp do_append_issues_via_issue_service( 71 {_hash, nodes}, 72 source_files, 73 nodes_threshold, 74 params, 75 exec 76 ) do 77 filename_map = nodes |> Enum.map(&{&1.filename, true}) |> Enum.into(%{}) 78 79 source_files 80 |> Enum.filter(fn source_file -> filename_map[source_file.filename] end) 81 |> Enum.each(&new_issue_for_members(&1, nodes_threshold, nodes, params, exec)) 82 end 83 84 defp new_issue_for_members(source_file, nodes_threshold, nodes, params, exec) do 85 this_node = Enum.find(nodes, &(&1.filename == source_file.filename)) 86 other_nodes = List.delete(nodes, this_node) 87 issue_meta = IssueMeta.for(source_file, params) 88 issue = issue_for(issue_meta, this_node, other_nodes, nodes_threshold, params) 89 90 if issue do 91 Credo.Execution.ExecutionIssues.append(exec, source_file, issue) 92 end 93 end 94 95 defp duplicate_nodes(source_files, mass_threshold) do 96 chunked_nodes = 97 source_files 98 |> Enum.chunk_every(30) 99 |> Enum.map(&Task.async(fn -> calculate_hashes_for_chunk(&1, mass_threshold) end)) 100 |> Enum.map(&Task.await(&1, :infinity)) 101 102 nodes = 103 Enum.reduce(chunked_nodes, %{}, fn current_hashes, existing_hashes -> 104 Map.merge(existing_hashes, current_hashes, fn _hash, node_items1, node_items2 -> 105 node_items1 ++ node_items2 106 end) 107 end) 108 109 nodes 110 |> prune_hashes 111 |> add_masses 112 end 113 114 defp calculate_hashes_for_chunk(source_files, mass_threshold) do 115 Enum.reduce(source_files, %{}, fn source_file, acc -> 116 ast = SourceFile.ast(source_file) 117 118 calculate_hashes(ast, acc, source_file.filename, mass_threshold) 119 end) 120 end 121 122 def add_masses(found_hashes) do 123 Enum.into(found_hashes, %{}, &add_mass_to_subnode/1) 124 end 125 126 defp add_mass_to_subnode({hash, node_items}) do 127 node_items = 128 Enum.map(node_items, fn node_item -> 129 %{node_item | mass: mass(node_item.node)} 130 end) 131 132 {hash, node_items} 133 end 134 135 @doc """ 136 Takes a map of hashes to nodes and prunes those nodes that are just 137 subnodes of others in the same set. 138 139 Returns the resulting map. 140 """ 141 def prune_hashes( 142 given_hashes, 143 mass_threshold \\ param_defaults()[:mass_threshold] 144 ) do 145 # remove entries containing a single node 146 hashes_with_multiple_nodes = 147 given_hashes 148 |> Enum.filter(fn {_hash, node_items} -> Enum.count(node_items) > 1 end) 149 |> Enum.into(%{}) 150 151 hashes_to_prune = 152 Enum.flat_map( 153 hashes_with_multiple_nodes, 154 &collect_subhashes(&1, mass_threshold) 155 ) 156 157 delete_keys(hashes_to_prune, hashes_with_multiple_nodes) 158 end 159 160 defp delete_keys([], acc), do: acc 161 162 defp delete_keys([head | tail], acc) do 163 delete_keys(tail, Map.delete(acc, head)) 164 end 165 166 defp collect_subhashes({_hash, node_items}, mass_threshold) do 167 %{node: first_node, filename: filename} = Enum.at(node_items, 0) 168 169 my_hash = first_node |> Credo.Code.remove_metadata() |> to_hash 170 # don't count self 171 subhashes = 172 first_node 173 |> calculate_hashes(%{}, filename, mass_threshold) 174 |> Map.keys() 175 |> List.delete(my_hash) 176 177 subhashes 178 end 179 180 @doc """ 181 Calculates hash values for all sub nodes in a given +ast+. 182 183 Returns a map with the hashes as keys and the nodes as values. 184 """ 185 def calculate_hashes( 186 ast, 187 existing_hashes \\ %{}, 188 filename \\ "foo.ex", 189 mass_threshold \\ param_defaults()[:mass_threshold] 190 ) 191 when is_map(existing_hashes) do 192 Credo.Code.prewalk( 193 ast, 194 &collect_hashes(&1, &2, filename, mass_threshold), 195 existing_hashes 196 ) 197 end 198 199 defp collect_hashes(ast, existing_hashes, filename, mass_threshold) do 200 if mass(ast) < mass_threshold do 201 {ast, existing_hashes} 202 else 203 hash = ast |> Credo.Code.remove_metadata() |> to_hash 204 node_item = %{node: ast, filename: filename, mass: nil} 205 node_items = Map.get(existing_hashes, hash, []) 206 207 updated_hashes = Map.put(existing_hashes, hash, node_items ++ [node_item]) 208 209 {ast, updated_hashes} 210 end 211 end 212 213 @doc """ 214 Returns a hash-value for a given +ast+. 215 """ 216 def to_hash(ast) do 217 string = 218 ast 219 |> Inspect.Algebra.to_doc(%Inspect.Opts{}) 220 |> Inspect.Algebra.format(80) 221 |> Enum.join("") 222 223 :sha256 224 |> :crypto.hash(string) 225 |> Base.encode16() 226 end 227 228 @doc """ 229 Returns the mass (count of instructions) for an AST. 230 """ 231 def mass(ast) do 232 Credo.Code.prewalk(ast, &calc_mass/2, 0) 233 end 234 235 defp calc_mass(ast, acc) when is_tuple(ast) do 236 {ast, acc + 1} 237 end 238 239 defp calc_mass(ast, acc) do 240 {ast, acc} 241 end 242 243 defp issue_for(issue_meta, this_node, other_nodes, nodes_threshold, params) do 244 if Enum.count(other_nodes) >= nodes_threshold - 1 do 245 filenames = 246 Enum.map_join(other_nodes, ", ", fn other_node -> 247 "#{other_node.filename}:#{line_no_for(other_node.node)}" 248 end) 249 250 node_mass = this_node.mass 251 line_no = line_no_for(this_node.node) 252 excluded_macros = params[:excluded_macros] || [] 253 254 if create_issue?(this_node.node, excluded_macros) do 255 format_issue( 256 issue_meta, 257 message: "Duplicate code found in #{filenames} (mass: #{node_mass}).", 258 line_no: line_no, 259 severity: Severity.compute(1 + Enum.count(other_nodes), 1) 260 ) 261 end 262 end 263 end 264 265 # ignore similar module attributes, no matter how complex 266 def create_issue?({:@, _, _}, _), do: false 267 268 def create_issue?([do: {atom, _, arguments}], excluded_macros) 269 when is_atom(atom) and is_list(arguments) do 270 !Enum.member?(excluded_macros, atom) 271 end 272 273 def create_issue?({atom, _, arguments}, excluded_macros) 274 when is_atom(atom) and is_list(arguments) do 275 !Enum.member?(excluded_macros, atom) 276 end 277 278 def create_issue?(_ast, _), do: true 279 280 # TODO: Put in AST helper 281 282 def line_no_for({:__block__, _meta, arguments}) do 283 line_no_for(arguments) 284 end 285 286 def line_no_for({:do, arguments}) do 287 line_no_for(arguments) 288 end 289 290 def line_no_for({atom, meta, _}) when is_atom(atom) do 291 meta[:line] 292 end 293 294 def line_no_for(list) when is_list(list) do 295 Enum.find_value(list, &line_no_for/1) 296 end 297 298 def line_no_for(nil), do: nil 299 300 def line_no_for(block) do 301 block 302 |> Credo.Code.Block.do_block_for!() 303 |> line_no_for 304 end 305 end