parser.ex (10684B)
1 defmodule AutoLinker.Parser do 2 @moduledoc """ 3 Module to handle parsing the the input string. 4 """ 5 6 alias AutoLinker.Builder 7 8 @doc """ 9 Parse the given string, identifying items to link. 10 11 Parses the string, replacing the matching urls and phone numbers with an html link. 12 13 ## Examples 14 15 iex> AutoLinker.Parser.parse("Check out http://google.com") 16 ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">http://google.com</a>} 17 """ 18 19 @valid_url ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui 20 21 @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))" 22 23 # @user 24 # @user@example.com 25 @match_mention ~r/^@[a-zA-Z\d_-]+(?:@.*)?/u 26 27 # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address 28 @valid_email ~r/.*@.*/u 29 30 @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u 31 32 @default_opts ~w(url)a 33 34 def parse(input, opts \\ %{}) 35 def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0) 36 def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{})) 37 38 def parse(input, opts) do 39 config = 40 :auto_linker 41 |> Application.get_env(:opts, []) 42 |> Enum.into(%{}) 43 |> Map.put( 44 :attributes, 45 Application.get_env(:auto_linker, :attributes, []) 46 ) 47 48 opts = 49 Enum.reduce(@default_opts, opts, fn opt, acc -> 50 if is_nil(opts[opt]) and is_nil(config[opt]) do 51 Map.put(acc, opt, true) 52 else 53 acc 54 end 55 end) 56 57 do_parse(input, Map.merge(config, opts)) 58 end 59 60 defp do_parse(input, %{phone: false} = opts), do: do_parse(input, Map.delete(opts, :phone)) 61 defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url)) 62 63 defp do_parse(input, %{phone: _} = opts) do 64 input 65 |> do_parse(opts, {"", "", :parsing}, &check_and_link_phone/3) 66 |> do_parse(Map.delete(opts, :phone)) 67 end 68 69 defp do_parse(input, %{hashtag: true} = opts) do 70 input 71 |> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3) 72 |> do_parse(Map.delete(opts, :hashtag)) 73 end 74 75 defp do_parse({text, user_acc}, %{markdown: true} = opts) do 76 text 77 |> Builder.create_markdown_links(opts) 78 |> (&{&1, user_acc}).() 79 |> do_parse(Map.delete(opts, :markdown)) 80 end 81 82 defp do_parse({text, user_acc}, %{url: _} = opts) do 83 input = 84 with exclude <- Map.get(opts, :exclude_patterns), 85 true <- is_list(exclude), 86 true <- String.starts_with?(text, exclude) do 87 {text, user_acc} 88 else 89 _ -> 90 do_parse( 91 {text, user_acc}, 92 opts, 93 {"", "", :parsing}, 94 &check_and_link/3 95 ) 96 end 97 98 do_parse(input, Map.delete(opts, :url)) 99 end 100 101 defp do_parse(input, %{mention: true} = opts) do 102 input 103 |> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3) 104 |> do_parse(Map.delete(opts, :mention)) 105 end 106 107 defp do_parse(input, _), do: input 108 109 defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler), 110 do: {acc, user_acc} 111 112 defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), 113 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler) 114 115 defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), 116 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler) 117 118 defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), 119 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler) 120 121 defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), 122 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler) 123 124 defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), 125 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler) 126 127 defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), 128 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler) 129 130 defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler), 131 do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler) 132 133 defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do 134 do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler) 135 end 136 137 defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler), 138 do: 139 do_parse( 140 {text, user_acc}, 141 opts, 142 {"", acc <> buffer <> ">", {:html, level}}, 143 handler 144 ) 145 146 defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do 147 do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler) 148 end 149 150 defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do 151 {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc) 152 153 do_parse( 154 {text, user_acc}, 155 opts, 156 {"", acc <> buffer <> "</", {:close, level}}, 157 handler 158 ) 159 end 160 161 defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler), 162 do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler) 163 164 defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler), 165 do: 166 do_parse( 167 {text, user_acc}, 168 opts, 169 {"", acc <> buffer <> ">", {:html, level - 1}}, 170 handler 171 ) 172 173 defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do 174 do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler) 175 end 176 177 # default cases where state is not important 178 defp do_parse( 179 {" " <> text, user_acc}, 180 %{phone: _} = opts, 181 {buffer, acc, state}, 182 handler 183 ), 184 do: do_parse({text, user_acc}, opts, {buffer <> " ", acc, state}, handler) 185 186 defp do_parse( 187 {<<char::bytes-size(1), text::binary>>, user_acc}, 188 opts, 189 {buffer, acc, state}, 190 handler 191 ) 192 when char in [" ", "\r", "\n"] do 193 {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc) 194 195 do_parse( 196 {text, user_acc}, 197 opts, 198 {"", acc <> buffer <> char, state}, 199 handler 200 ) 201 end 202 203 defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do 204 {buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc) 205 206 do_parse( 207 {"", user_acc}, 208 opts, 209 {"", acc <> buffer, state}, 210 handler 211 ) 212 end 213 214 defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler), 215 do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler) 216 217 def check_and_link("mailto:" <> _ = buffer, opts, _user_acc) do 218 buffer 219 |> is_email?() 220 |> link_url(buffer, opts) 221 end 222 223 def check_and_link("xmpp:" <> _ = buffer, opts, _user_acc) do 224 buffer 225 |> is_email?() 226 |> link_url(buffer, opts) 227 end 228 229 def check_and_link(buffer, opts, _user_acc) do 230 prefixes = ["http://", "https://"] ++ (opts[:extra_prefixes] || []) 231 232 buffer 233 |> is_prefixed_url?(prefixes) 234 |> link_url(buffer, opts) 235 end 236 237 def check_and_link_phone(buffer, opts, _user_acc) do 238 buffer 239 |> match_phone 240 |> link_phone(buffer, opts) 241 end 242 243 def check_and_link_mention(buffer, opts, user_acc) do 244 buffer 245 |> match_mention 246 |> link_mention(buffer, opts, user_acc) 247 end 248 249 def check_and_link_hashtag(buffer, opts, user_acc) do 250 buffer 251 |> match_hashtag 252 |> link_hashtag(buffer, opts, user_acc) 253 end 254 255 # @doc false 256 def is_url?(buffer, _) do 257 is_url?(buffer) 258 end 259 260 def is_url?(buffer) do 261 is_prefixed_url?(buffer, ["http://", "https://", "xmpp:", "mailto:"]) 262 end 263 264 def is_prefixed_url?(buffer, prefixes) do 265 Regex.match?(@valid_url, buffer) and String.starts_with?(buffer, prefixes) 266 end 267 268 def is_email?(buffer) do 269 is_url?(buffer) and Regex.match?(@valid_email, buffer) 270 end 271 272 # Shimming 273 def is_ip?(_buffer) do 274 true 275 end 276 277 @doc false 278 def match_phone(buffer) do 279 case Regex.scan(@match_phone, buffer) do 280 [] -> nil 281 other -> other 282 end 283 end 284 285 def match_mention(buffer) do 286 case Regex.run(@match_mention, buffer) do 287 [mention] -> mention 288 _ -> nil 289 end 290 end 291 292 def match_hashtag(buffer) do 293 case Regex.run(@match_hashtag, buffer, capture: [:tag]) do 294 [hashtag] -> hashtag 295 _ -> nil 296 end 297 end 298 299 def link_hashtag(nil, buffer, _, _user_acc), do: buffer 300 301 def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do 302 hashtag 303 |> hashtag_handler.(buffer, opts, user_acc) 304 |> maybe_update_buffer(hashtag, buffer) 305 end 306 307 def link_hashtag(hashtag, buffer, opts, _user_acc) do 308 hashtag 309 |> Builder.create_hashtag_link(buffer, opts) 310 |> maybe_update_buffer(hashtag, buffer) 311 end 312 313 def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc} 314 315 def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do 316 mention 317 |> mention_handler.(buffer, opts, user_acc) 318 |> maybe_update_buffer(mention, buffer) 319 end 320 321 def link_mention(mention, buffer, opts, _user_acc) do 322 mention 323 |> Builder.create_mention_link(buffer, opts) 324 |> maybe_update_buffer(mention, buffer) 325 end 326 327 defp maybe_update_buffer(out, match, buffer) when is_binary(out) do 328 maybe_update_buffer({out, nil}, match, buffer) 329 end 330 331 defp maybe_update_buffer({out, user_acc}, match, buffer) 332 when match != buffer and out != buffer do 333 out = String.replace(buffer, match, out) 334 {out, user_acc} 335 end 336 337 defp maybe_update_buffer(out, _match, _buffer), do: out 338 339 def link_phone(nil, buffer, _), do: buffer 340 341 def link_phone(list, buffer, opts) do 342 Builder.create_phone_link(list, buffer, opts) 343 end 344 345 @doc false 346 def link_url(true, buffer, opts) do 347 Builder.create_link(buffer, opts) 348 end 349 350 def link_url(_, buffer, _opts), do: buffer 351 352 @doc false 353 def link_email(true, buffer, opts) do 354 Builder.create_email_link(buffer, opts) 355 end 356 357 def link_email(_, buffer, _opts), do: buffer 358 359 defp run_handler(handler, buffer, opts, user_acc) do 360 case handler.(buffer, opts, user_acc) do 361 {buffer, user_acc} -> {buffer, user_acc} 362 buffer -> {buffer, user_acc} 363 end 364 end 365 end