logo

auto_linker

AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto_linker

parser.ex (10684B)


      1 defmodule AutoLinker.Parser do
      2   @moduledoc """
      3   Module to handle parsing the the input string.
      4   """
      5 
      6   alias AutoLinker.Builder
      7 
      8   @doc """
      9   Parse the given string, identifying items to link.
     10 
     11   Parses the string, replacing the matching urls and phone numbers with an html link.
     12 
     13   ## Examples
     14 
     15       iex> AutoLinker.Parser.parse("Check out http://google.com")
     16       ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">http://google.com</a>}
     17   """
     18 
     19   @valid_url ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui
     20 
     21   @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
     22 
     23   # @user
     24   # @user@example.com
     25   @match_mention ~r/^@[a-zA-Z\d_-]+(?:@.*)?/u
     26 
     27   # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
     28   @valid_email ~r/.*@.*/u
     29 
     30   @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
     31 
     32   @default_opts ~w(url)a
     33 
     34   def parse(input, opts \\ %{})
     35   def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
     36   def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
     37 
     38   def parse(input, opts) do
     39     config =
     40       :auto_linker
     41       |> Application.get_env(:opts, [])
     42       |> Enum.into(%{})
     43       |> Map.put(
     44         :attributes,
     45         Application.get_env(:auto_linker, :attributes, [])
     46       )
     47 
     48     opts =
     49       Enum.reduce(@default_opts, opts, fn opt, acc ->
     50         if is_nil(opts[opt]) and is_nil(config[opt]) do
     51           Map.put(acc, opt, true)
     52         else
     53           acc
     54         end
     55       end)
     56 
     57     do_parse(input, Map.merge(config, opts))
     58   end
     59 
     60   defp do_parse(input, %{phone: false} = opts), do: do_parse(input, Map.delete(opts, :phone))
     61   defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url))
     62 
     63   defp do_parse(input, %{phone: _} = opts) do
     64     input
     65     |> do_parse(opts, {"", "", :parsing}, &check_and_link_phone/3)
     66     |> do_parse(Map.delete(opts, :phone))
     67   end
     68 
     69   defp do_parse(input, %{hashtag: true} = opts) do
     70     input
     71     |> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3)
     72     |> do_parse(Map.delete(opts, :hashtag))
     73   end
     74 
     75   defp do_parse({text, user_acc}, %{markdown: true} = opts) do
     76     text
     77     |> Builder.create_markdown_links(opts)
     78     |> (&{&1, user_acc}).()
     79     |> do_parse(Map.delete(opts, :markdown))
     80   end
     81 
     82   defp do_parse({text, user_acc}, %{url: _} = opts) do
     83     input =
     84       with exclude <- Map.get(opts, :exclude_patterns),
     85            true <- is_list(exclude),
     86            true <- String.starts_with?(text, exclude) do
     87         {text, user_acc}
     88       else
     89         _ ->
     90           do_parse(
     91             {text, user_acc},
     92             opts,
     93             {"", "", :parsing},
     94             &check_and_link/3
     95           )
     96       end
     97 
     98     do_parse(input, Map.delete(opts, :url))
     99   end
    100 
    101   defp do_parse(input, %{mention: true} = opts) do
    102     input
    103     |> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3)
    104     |> do_parse(Map.delete(opts, :mention))
    105   end
    106 
    107   defp do_parse(input, _), do: input
    108 
    109   defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
    110     do: {acc, user_acc}
    111 
    112   defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
    113     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
    114 
    115   defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
    116     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
    117 
    118   defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
    119     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
    120 
    121   defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
    122     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
    123 
    124   defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
    125     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
    126 
    127   defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
    128     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
    129 
    130   defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
    131     do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
    132 
    133   defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
    134     do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
    135   end
    136 
    137   defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
    138     do:
    139       do_parse(
    140         {text, user_acc},
    141         opts,
    142         {"", acc <> buffer <> ">", {:html, level}},
    143         handler
    144       )
    145 
    146   defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do
    147     do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
    148   end
    149 
    150   defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do
    151     {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
    152 
    153     do_parse(
    154       {text, user_acc},
    155       opts,
    156       {"", acc <> buffer <> "</", {:close, level}},
    157       handler
    158     )
    159   end
    160 
    161   defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler),
    162     do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler)
    163 
    164   defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler),
    165     do:
    166       do_parse(
    167         {text, user_acc},
    168         opts,
    169         {"", acc <> buffer <> ">", {:html, level - 1}},
    170         handler
    171       )
    172 
    173   defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
    174     do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
    175   end
    176 
    177   # default cases where state is not important
    178   defp do_parse(
    179          {" " <> text, user_acc},
    180          %{phone: _} = opts,
    181          {buffer, acc, state},
    182          handler
    183        ),
    184        do: do_parse({text, user_acc}, opts, {buffer <> " ", acc, state}, handler)
    185 
    186   defp do_parse(
    187          {<<char::bytes-size(1), text::binary>>, user_acc},
    188          opts,
    189          {buffer, acc, state},
    190          handler
    191        )
    192        when char in [" ", "\r", "\n"] do
    193     {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
    194 
    195     do_parse(
    196       {text, user_acc},
    197       opts,
    198       {"", acc <> buffer <> char, state},
    199       handler
    200     )
    201   end
    202 
    203   defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do
    204     {buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc)
    205 
    206     do_parse(
    207       {"", user_acc},
    208       opts,
    209       {"", acc <> buffer, state},
    210       handler
    211     )
    212   end
    213 
    214   defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
    215     do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
    216 
    217   def check_and_link("mailto:" <> _ = buffer, opts, _user_acc) do
    218     buffer
    219     |> is_email?()
    220     |> link_url(buffer, opts)
    221   end
    222 
    223   def check_and_link("xmpp:" <> _ = buffer, opts, _user_acc) do
    224     buffer
    225     |> is_email?()
    226     |> link_url(buffer, opts)
    227   end
    228 
    229   def check_and_link(buffer, opts, _user_acc) do
    230     prefixes = ["http://", "https://"] ++ (opts[:extra_prefixes] || [])
    231 
    232     buffer
    233     |> is_prefixed_url?(prefixes)
    234     |> link_url(buffer, opts)
    235   end
    236 
    237   def check_and_link_phone(buffer, opts, _user_acc) do
    238     buffer
    239     |> match_phone
    240     |> link_phone(buffer, opts)
    241   end
    242 
    243   def check_and_link_mention(buffer, opts, user_acc) do
    244     buffer
    245     |> match_mention
    246     |> link_mention(buffer, opts, user_acc)
    247   end
    248 
    249   def check_and_link_hashtag(buffer, opts, user_acc) do
    250     buffer
    251     |> match_hashtag
    252     |> link_hashtag(buffer, opts, user_acc)
    253   end
    254 
    255   # @doc false
    256   def is_url?(buffer, _) do
    257     is_url?(buffer)
    258   end
    259 
    260   def is_url?(buffer) do
    261     is_prefixed_url?(buffer, ["http://", "https://", "xmpp:", "mailto:"])
    262   end
    263 
    264   def is_prefixed_url?(buffer, prefixes) do
    265     Regex.match?(@valid_url, buffer) and String.starts_with?(buffer, prefixes)
    266   end
    267 
    268   def is_email?(buffer) do
    269     is_url?(buffer) and Regex.match?(@valid_email, buffer)
    270   end
    271 
    272   # Shimming
    273   def is_ip?(_buffer) do
    274     true
    275   end
    276 
    277   @doc false
    278   def match_phone(buffer) do
    279     case Regex.scan(@match_phone, buffer) do
    280       [] -> nil
    281       other -> other
    282     end
    283   end
    284 
    285   def match_mention(buffer) do
    286     case Regex.run(@match_mention, buffer) do
    287       [mention] -> mention
    288       _ -> nil
    289     end
    290   end
    291 
    292   def match_hashtag(buffer) do
    293     case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
    294       [hashtag] -> hashtag
    295       _ -> nil
    296     end
    297   end
    298 
    299   def link_hashtag(nil, buffer, _, _user_acc), do: buffer
    300 
    301   def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
    302     hashtag
    303     |> hashtag_handler.(buffer, opts, user_acc)
    304     |> maybe_update_buffer(hashtag, buffer)
    305   end
    306 
    307   def link_hashtag(hashtag, buffer, opts, _user_acc) do
    308     hashtag
    309     |> Builder.create_hashtag_link(buffer, opts)
    310     |> maybe_update_buffer(hashtag, buffer)
    311   end
    312 
    313   def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
    314 
    315   def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
    316     mention
    317     |> mention_handler.(buffer, opts, user_acc)
    318     |> maybe_update_buffer(mention, buffer)
    319   end
    320 
    321   def link_mention(mention, buffer, opts, _user_acc) do
    322     mention
    323     |> Builder.create_mention_link(buffer, opts)
    324     |> maybe_update_buffer(mention, buffer)
    325   end
    326 
    327   defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
    328     maybe_update_buffer({out, nil}, match, buffer)
    329   end
    330 
    331   defp maybe_update_buffer({out, user_acc}, match, buffer)
    332        when match != buffer and out != buffer do
    333     out = String.replace(buffer, match, out)
    334     {out, user_acc}
    335   end
    336 
    337   defp maybe_update_buffer(out, _match, _buffer), do: out
    338 
    339   def link_phone(nil, buffer, _), do: buffer
    340 
    341   def link_phone(list, buffer, opts) do
    342     Builder.create_phone_link(list, buffer, opts)
    343   end
    344 
    345   @doc false
    346   def link_url(true, buffer, opts) do
    347     Builder.create_link(buffer, opts)
    348   end
    349 
    350   def link_url(_, buffer, _opts), do: buffer
    351 
    352   @doc false
    353   def link_email(true, buffer, opts) do
    354     Builder.create_email_link(buffer, opts)
    355   end
    356 
    357   def link_email(_, buffer, _opts), do: buffer
    358 
    359   defp run_handler(handler, buffer, opts, user_acc) do
    360     case handler.(buffer, opts, user_acc) do
    361       {buffer, user_acc} -> {buffer, user_acc}
    362       buffer -> {buffer, user_acc}
    363     end
    364   end
    365 end