groups.ex (11000B)
1 defmodule Makeup.Lexer.Groups do 2 @moduledoc """ 3 Utilities to highlight groups of tokens on mouseover. 4 5 The typical example is to highlight matching pairs of delimiters, 6 such as parenthesis, angle brackets, etc. 7 """ 8 9 defp make_match([] = _patterns, _varnames, rest_of_tokens_varname) do 10 quote do 11 unquote(Macro.var(rest_of_tokens_varname, __MODULE__)) 12 end 13 end 14 15 defp make_match([pattern | patterns], [varname | varnames], rest_of_tokens_varname) do 16 var = Macro.var(varname, __MODULE__) 17 18 quote do 19 [ 20 unquote(pattern) = unquote(var) 21 | unquote(make_match(patterns, varnames, rest_of_tokens_varname)) 22 ] 23 end 24 end 25 26 defp put_group_ids([], _group_id_varname) do 27 quote(do: []) 28 end 29 30 defp put_group_ids(tokens, group_id_varname) do 31 group_id = Macro.var(group_id_varname, __MODULE__) 32 33 for {ttype_varname, attr_varname, text_varname} <- tokens do 34 ttype = Macro.var(ttype_varname, __MODULE__) 35 attr = Macro.var(attr_varname, __MODULE__) 36 text = Macro.var(text_varname, __MODULE__) 37 38 quote do 39 { 40 unquote(ttype), 41 Map.put( 42 unquote(attr), 43 # The Map key (an atom) 44 unquote(group_id_varname), 45 # The variable holding the value 46 unquote(group_id) 47 ), 48 unquote(text) 49 } 50 end 51 end 52 end 53 54 defp open_branch(stack_name, pattern, group_prefix_varname, group_nr_varname) do 55 group_nr = Macro.var(group_nr_varname, __MODULE__) 56 group_prefix = Macro.var(group_prefix_varname, __MODULE__) 57 group_id = Macro.var(:group_id, __MODULE__) 58 rest_of_tokens = Macro.var(:rest_of_tokens, __MODULE__) 59 60 n = length(pattern) 61 62 token_varnames = for i <- 1..n, do: String.to_atom("token__#{i}") 63 ttype_varnames = for i <- 1..n, do: String.to_atom("ttype__#{i}") 64 attr_varnames = for i <- 1..n, do: String.to_atom("attr__#{i}") 65 text_varnames = for i <- 1..n, do: String.to_atom("text__#{i}") 66 tokens_data = List.zip([token_varnames, ttype_varnames, attr_varnames, text_varnames]) 67 68 pattern_matches = 69 for {token_varname, ttype_varname, attr_varname, text_varname} <- tokens_data do 70 token = Macro.var(token_varname, __MODULE__) 71 ttype = Macro.var(ttype_varname, __MODULE__) 72 attr = Macro.var(attr_varname, __MODULE__) 73 text = Macro.var(text_varname, __MODULE__) 74 75 quote do 76 {unquote(ttype), unquote(attr), unquote(text)} = unquote(token) 77 end 78 end 79 80 tokens_pattern = make_match(pattern, token_varnames, :rest_of_tokens) 81 82 tokens_for_result = List.zip([ttype_varnames, attr_varnames, text_varnames]) 83 head_tokens = put_group_ids(tokens_for_result, :group_id) 84 85 quote do 86 {stack, unquote(tokens_pattern)} -> 87 new_group_nr = unquote(group_nr) + 1 88 unquote(group_id) = unquote(group_prefix) <> "-" <> to_string(new_group_nr) 89 unquote_splicing(pattern_matches) 90 head_tokens = unquote(head_tokens) 91 head_of_stack = {unquote(stack_name), new_group_nr} 92 new_stack = [head_of_stack | stack] 93 {new_stack, new_group_nr, head_tokens, unquote(rest_of_tokens)} 94 end 95 end 96 97 defp close_branch(stack_name, pattern, group_prefix_varname, group_nr_varname) do 98 group_nr = Macro.var(group_nr_varname, __MODULE__) 99 group_prefix = Macro.var(group_prefix_varname, __MODULE__) 100 group_id = Macro.var(:group_id, __MODULE__) 101 rest_of_stack = Macro.var(:rest_of_stack, __MODULE__) 102 rest_of_tokens = Macro.var(:rest_of_tokens, __MODULE__) 103 104 n = length(pattern) 105 106 token_varnames = for i <- 1..n, do: String.to_atom("token__#{i}") 107 ttype_varnames = for i <- 1..n, do: String.to_atom("ttype__#{i}") 108 attr_varnames = for i <- 1..n, do: String.to_atom("attr__#{i}") 109 text_varnames = for i <- 1..n, do: String.to_atom("text__#{i}") 110 tokens_data = List.zip([token_varnames, ttype_varnames, attr_varnames, text_varnames]) 111 112 pattern_matches = 113 for {token_varname, ttype_varname, attr_varname, text_varname} <- tokens_data do 114 token = Macro.var(token_varname, __MODULE__) 115 ttype = Macro.var(ttype_varname, __MODULE__) 116 attr = Macro.var(attr_varname, __MODULE__) 117 text = Macro.var(text_varname, __MODULE__) 118 119 quote do 120 {unquote(ttype), unquote(attr), unquote(text)} = unquote(token) 121 end 122 end 123 124 stack_pattern = 125 quote do 126 [{unquote(stack_name), current_group_nr} | unquote(rest_of_stack)] 127 end 128 129 tokens_pattern = make_match(pattern, token_varnames, :rest_of_tokens) 130 131 tokens_for_result = List.zip([ttype_varnames, attr_varnames, text_varnames]) 132 head_tokens = put_group_ids(tokens_for_result, :group_id) 133 134 quote do 135 {unquote(stack_pattern), unquote(tokens_pattern)} -> 136 unquote(group_id) = unquote(group_prefix) <> "-" <> to_string(current_group_nr) 137 unquote_splicing(pattern_matches) 138 head_tokens = unquote(head_tokens) 139 {unquote(rest_of_stack), unquote(group_nr), head_tokens, unquote(rest_of_tokens)} 140 end 141 end 142 143 defp middle_branch(stack_name, pattern, group_prefix_varname, group_nr_varname) do 144 group_nr = Macro.var(group_nr_varname, __MODULE__) 145 146 group_prefix = Macro.var(group_prefix_varname, __MODULE__) 147 group_id = Macro.var(:group_id, __MODULE__) 148 rest_of_stack = Macro.var(:rest_of_stack, __MODULE__) 149 rest_of_tokens = Macro.var(:rest_of_tokens, __MODULE__) 150 151 n = length(pattern) 152 153 token_varnames = for i <- 1..n, do: String.to_atom("token__#{i}") 154 ttype_varnames = for i <- 1..n, do: String.to_atom("ttype__#{i}") 155 attr_varnames = for i <- 1..n, do: String.to_atom("attr__#{i}") 156 text_varnames = for i <- 1..n, do: String.to_atom("text__#{i}") 157 tokens_data = List.zip([token_varnames, ttype_varnames, attr_varnames, text_varnames]) 158 159 pattern_matches = 160 for {token_varname, ttype_varname, attr_varname, text_varname} <- tokens_data do 161 token = Macro.var(token_varname, __MODULE__) 162 ttype = Macro.var(ttype_varname, __MODULE__) 163 attr = Macro.var(attr_varname, __MODULE__) 164 text = Macro.var(text_varname, __MODULE__) 165 166 quote do 167 {unquote(ttype), unquote(attr), unquote(text)} = unquote(token) 168 end 169 end 170 171 stack_pattern = 172 quote do 173 [{unquote(stack_name), current_group_nr} | unquote(rest_of_stack)] 174 end 175 176 tokens_pattern = make_match(pattern, token_varnames, :rest_of_tokens) 177 178 tokens_for_result = List.zip([ttype_varnames, attr_varnames, text_varnames]) 179 head_tokens = put_group_ids(tokens_for_result, :group_id) 180 181 quote do 182 {unquote(stack_pattern) = stack, unquote(tokens_pattern)} -> 183 unquote(group_id) = unquote(group_prefix) <> "-" <> to_string(current_group_nr) 184 unquote_splicing(pattern_matches) 185 head_tokens = unquote(head_tokens) 186 {stack, unquote(group_nr), head_tokens, unquote(rest_of_tokens)} 187 end 188 end 189 190 defp branches_for_stack({stack_name, parts}) do 191 open_patterns = Keyword.fetch!(parts, :open) 192 middle_patterns = Keyword.get(parts, :middle, []) 193 close_patterns = Keyword.fetch!(parts, :close) 194 195 open_branches_ast = 196 Enum.map( 197 open_patterns, 198 fn pattern -> open_branch(stack_name, pattern, :group_prefix, :group_nr) end 199 ) 200 201 middle_branches_ast = 202 Enum.map( 203 middle_patterns, 204 fn pattern -> middle_branch(stack_name, pattern, :group_prefix, :group_nr) end 205 ) 206 207 close_branches_ast = 208 Enum.map( 209 close_patterns, 210 fn pattern -> close_branch(stack_name, pattern, :group_prefix, :group_nr) end 211 ) 212 213 open_branches_ast ++ middle_branches_ast ++ close_branches_ast 214 end 215 216 @doc """ 217 Defines a function with the given `name` that takes a list of tokens and divides 218 matching delimiters into groups. 219 220 Takes as arguments a `name` for the function (must be an atom) and a list 221 containing the patterns describing the matching groups. 222 223 ## Examples 224 225 # Extracted from the default elixir lexer that ships with ExDoc 226 defgroupmatcher :match_groups, [ 227 # Match opening and closing parenthesis 228 parentheses: [ 229 open: [[{:punctuation, %{language: :elixir}, "("}]], 230 close: [[{:punctuation, %{language: :elixir}, ")"}]] 231 ], 232 233 # Match more complex delimiters, but still an open and close delimiter 234 fn_end: [ 235 open: [[{:keyword, %{language: :elixir}, "fn"}]], 236 close: [[{:keyword, %{language: :elixir}, "end"}]] 237 ] 238 239 # Match delimiters with middle components 240 do_end: [ 241 open: [ 242 [{:keyword, %{language: :elixir}, "do"}] 243 ], 244 middle: [ 245 [{:keyword, %{language: :elixir}, "else"}], 246 [{:keyword, %{language: :elixir}, "catch"}], 247 [{:keyword, %{language: :elixir}, "rescue"}], 248 [{:keyword, %{language: :elixir}, "after"}] 249 ], 250 close: [ 251 [{:keyword, %{language: :elixir}, "end"}] 252 ] 253 ] 254 ] 255 """ 256 defmacro defgroupmatcher(name, stacks, opts \\ []) do 257 name_helper = 258 name 259 |> Atom.to_string() 260 |> Kernel.<>("__helper") 261 |> String.to_atom() 262 263 branches = 264 stacks 265 |> Enum.map(&branches_for_stack/1) 266 |> List.flatten() 267 268 group_nr = Macro.var(:group_nr, __MODULE__) 269 270 unmatched_token_branch = 271 quote do 272 {old_stack, [token | toks]} -> 273 {old_stack, unquote(group_nr), [token], toks} 274 end 275 276 no_more_tokens_branch = 277 quote do 278 {old_stack, []} -> 279 {old_stack, unquote(group_nr), [], []} 280 end 281 282 all_branches = branches ++ unmatched_token_branch ++ no_more_tokens_branch 283 284 expr = 285 quote do 286 def unquote(name)(tokens, group_prefix \\ "group") do 287 unquote(name_helper)([], tokens, group_prefix, 0) |> :lists.flatten() 288 end 289 290 defp unquote(name_helper)(stack, tokens, group_prefix, group_nr) do 291 {new_stack, new_group_nr, head_tokens, rest_of_tokens} = 292 case {stack, tokens} do 293 unquote(all_branches) 294 end 295 296 case head_tokens do 297 [] -> 298 [] 299 300 _ -> 301 # Don't worry about the nested list, we'll flatten it later 302 [ 303 head_tokens 304 | unquote(name_helper)(new_stack, rest_of_tokens, group_prefix, new_group_nr) 305 ] 306 end 307 end 308 end 309 310 if Keyword.get(opts, :debug) do 311 expr 312 |> Macro.to_string() 313 |> Code.format_string!() 314 |> IO.puts() 315 end 316 317 expr 318 end 319 320 @doc """ 321 Returns a random prefix for group ids in an HTML file. 322 323 This is useful to avoid collisions. 324 The group ids should be unique for a certain HTML document, and the easiest way of guaranteeing it 325 is by generating long random prefixes. 326 """ 327 def random_prefix(n), do: Enum.map(1..n, fn _ -> Enum.random(?0..?9) end) |> to_string 328 end