logo

auto_linker

AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto_linker git clone https://hacktivis.me/git/auto_linker.git
commit: 649fc9125daaef03abdba867f600ed398bd5c5b0
parent 26f5310379effc184e8e93c13b4f26c3d6528e62
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Tue, 17 Nov 2020 16:27:24 +0100

parser: Validate IPv6, IDN compatibility in email and mentions

Diffstat:

Mlib/linkify/parser.ex73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mtest/linkify_test.exs20++++++--------------
Mtest/parser_test.exs2+-
3 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex @@ -9,17 +9,11 @@ defmodule Linkify.Parser do @match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u - @match_hostname ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u - - @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" + @get_scheme_host ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u # @user # @user@example.com # credo:disable-for-next-line - @match_mention ~r/^(?:\W*)?(?<long>@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)|^(?:\W*)?(?<short>@[a-zA-Z\d_-]+)/u - - # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address - @match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u @@ -63,7 +57,7 @@ defmodule Linkify.Parser do ~s{Check out <a href="http://google.com">google.com</a>} """ - @types [:url, :email, :hashtag, :extra, :mention] + @types [:url, :hashtag, :extra, :mention, :email] def parse(input, opts \\ %{}) def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0) @@ -224,7 +218,11 @@ defmodule Linkify.Parser do end def email?(buffer, opts) do - valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts) + # Note: In reality the local part can only be checked by the remote server + case Regex.run(~r/^(?<user>.*)@(?<host>[^@]+)$/, buffer, capture: [:user, :host]) do + [_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts) + _ -> false + end end defp valid_url?(url), do: !Regex.match?(@invalid_url, url) @@ -237,7 +235,7 @@ defmodule Linkify.Parser do Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme. """ def valid_tld?(url, opts) do - [scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host]) + [scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host]) cond do opts[:validate_tld] == false -> @@ -256,13 +254,58 @@ defmodule Linkify.Parser do end end - def ip?(buffer), do: Regex.match?(@match_ip, buffer) + def safe_to_integer(string, base \\ 10) do + String.to_integer(string, base) + rescue + _ -> + nil + end + + def ip?(buffer) do + v4 = String.split(buffer, ".") + + v6 = + buffer + |> String.trim_leading("[") + |> String.trim_trailing("]") + |> String.split(":", trim: true) + + cond do + length(v4) == 4 -> + !Enum.any?(v4, fn x -> safe_to_integer(x, 10) not in 0..255 end) + + length(v6) in 1..8 -> + !Enum.any?(v4, fn x -> safe_to_integer(x, 16) not in 0..0xFFFF end) + + false -> + false + end + end + + # IDN-compatible, ported from musl-libc's is_valid_hostname() + def valid_hostname?(hostname) do + hostname + |> String.to_charlist() + |> Enum.any?(fn s -> + !(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-') + end) + |> Kernel.!() + end def match_mention(buffer) do - case Regex.run(@match_mention, buffer, capture: [:long, :short]) do - [mention, ""] -> mention - ["", mention] -> mention - _ -> nil + case Regex.run(~r/^@(?<user>[a-zA-Z\d_-]+)(@(?<host>[^@]+))?$/, buffer, + capture: [:user, :host] + ) do + [user, ""] -> + "@" <> user + + [user, hostname] -> + if valid_hostname?(hostname) && valid_tld?(hostname, []), + do: "@" <> user <> "@" <> hostname, + else: nil + + _ -> + nil end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs @@ -244,7 +244,7 @@ defmodule LinkifyTest do end expected = - ~s(Hello again, <span class="h-card"><a href="#/user/user">@<span>@user</span></a></span>.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>) + ~s(Hello again, @user.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>) assert Linkify.link(text, mention: true, @@ -377,22 +377,14 @@ defmodule LinkifyTest do text = "That's @user@example.com's server" - expected = - "That's <a href=\"https://example.com/user/user@example.com\">@user@example.com</a>'s server" - - assert Linkify.link(text, - mention: true, - mention_prefix: "https://example.com/user/" - ) == expected + assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/") == + text end - test "mentions with symbols before them" do - text = "@@example hey! >@@test@example.com" - - expected = - "@<a href=\"/users/example\">@example</a> hey! >@<a href=\"/users/test@example.com\">@test@example.com</a>" + test "mentions with no word-separation before them" do + text = "@@example hey! >@@test@example.com idolm@ster" - assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected + assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text end test "invalid mentions" do diff --git a/test/parser_test.exs b/test/parser_test.exs @@ -305,6 +305,6 @@ defmodule Linkify.ParserTest do ] def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"] - def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock", "xmpp:lain@trashserver.net"] + def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"] def valid_custom_tld_emails, do: ["hi@company.null"] end