logo

auto_linker

AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto_linker
commit: 479acfb82d3e73ded82621b6e153d747a02aa1b8
parent: 8ec0c74b3a6e0309f4e1c2014d72ab00a0e722e0
Author: Egor Kislitsyn <egor@kislitsyn.com>
Date:   Fri,  8 Feb 2019 13:45:11 +0700

add email, @mentions, #hashtags and extra schemes

Diffstat:

Mlib/auto_linker/builder.ex58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/auto_linker/parser.ex145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mtest/auto_linker_test.exs207+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 403 insertions(+), 7 deletions(-)

diff --git a/lib/auto_linker/builder.ex b/lib/auto_linker/builder.ex @@ -112,6 +112,64 @@ defmodule AutoLinker.Builder do }>' end + def create_mention_link("@" <> name, _buffer, opts) do + mention_prefix = opts[:mention_prefix] + + url = mention_prefix <> name + + [] + |> build_attrs(url, opts, :rel) + |> build_attrs(url, opts, :target) + |> build_attrs(url, opts, :class) + |> build_attrs(url, opts, :scheme) + |> format_mention(name, opts) + end + + def create_hashtag_link(tag, _buffer, opts) do + hashtag_prefix = opts[:hashtag_prefix] + + url = hashtag_prefix <> tag + + [] + |> build_attrs(url, opts, :rel) + |> build_attrs(url, opts, :target) + |> build_attrs(url, opts, :class) + |> build_attrs(url, opts, :scheme) + |> format_hashtag(tag, opts) + end + + def create_email_link(email, opts) do + [] + |> build_attrs(email, opts, :class) + |> format_email(email, opts) + end + + def create_extra_link(uri, opts) do + [] + |> build_attrs(uri, opts, :class) + |> format_extra(uri, opts) + end + + def format_mention(attrs, name, _opts) do + attrs = format_attrs(attrs) + "<a #{attrs}>@" <> name <> "</a>" + end + + def format_hashtag(attrs, tag, _opts) do + attrs = format_attrs(attrs) + "<a #{attrs}>#" <> tag <> "</a>" + end + + def format_email(attrs, email, _opts) do + attrs = format_attrs(attrs) + "<a href='mailto:#{email}' #{attrs}>#{email}</a>" + end + + def format_extra(attrs, uri, _opts) do + attrs = format_attrs(attrs) + "<a href='#{uri}' #{attrs}>#{uri}</a>" + end + defp format_attributes(attrs) do Enum.reduce(attrs, "", fn {name, value}, acc -> acc <> ~s' #{name}="#{value}"' diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex @@ -28,19 +28,43 @@ defmodule AutoLinker.Parser do # @invalid_url ~r/\.\.+/ @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/ - @match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} - @match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} + @match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$} + + @match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$} @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))" - @match_hostname ~r{^(?:https?:\/\/)?(?:[^@\n]+@)?(?<host>[^:#~\/\n?]+)} + @match_hostname ~r{^(?:https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)} @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" - @default_opts ~w(url)a + # @user + # @user@example.com + @match_mention ~r/^@[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@?[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*/u + + # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address + @match_email ~r/^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/u + + @match_hashtag ~r/^\#(?<tag>\w+)/u + + @prefix_extra [ + "magnet:?", + "dweb://", + "dat://", + "gopher://", + "ipfs://", + "ipns://", + "irc://", + "ircs://", + "irc6://", + "mumble://", + "ssb://" + ] @tlds "./priv/tlds.txt" |> File.read!() |> String.trim() |> String.split("\n") + @default_opts ~w(url)a + def parse(text, opts \\ %{}) def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{})) @@ -75,12 +99,30 @@ defmodule AutoLinker.Parser do |> do_parse(Map.delete(opts, :phone)) end + defp do_parse(text, %{mention: true} = opts) do + text + |> do_parse(false, opts, {"", "", :parsing}, &check_and_link_mention/3) + |> do_parse(Map.delete(opts, :mention)) + end + + defp do_parse(text, %{extra: true} = opts) do + text + |> do_parse(false, opts, {"", "", :parsing}, &check_and_link_extra/3) + |> do_parse(Map.delete(opts, :extra)) + end + defp do_parse(text, %{markdown: true} = opts) do text |> Builder.create_markdown_links(opts) |> do_parse(Map.delete(opts, :markdown)) end + defp do_parse(text, %{email: true} = opts) do + text + |> do_parse(false, opts, {"", "", :parsing}, &check_and_link_email/3) + |> do_parse(Map.delete(opts, :email)) + end + defp do_parse(text, %{url: _} = opts) do if (exclude = Map.get(opts, :exclude_pattern, false)) && String.starts_with?(text, exclude) do text @@ -90,6 +132,12 @@ defmodule AutoLinker.Parser do |> do_parse(Map.delete(opts, :url)) end + defp do_parse(text, %{hashtag: true} = opts) do + text + |> do_parse(false, opts, {"", "", :parsing}, &check_and_link_hashtag/3) + |> do_parse(Map.delete(opts, :hashtag)) + end + defp do_parse(text, _), do: text defp do_parse("", _scheme, _opts, {"", acc, _}, _handler), @@ -110,8 +158,9 @@ defmodule AutoLinker.Parser do defp do_parse(">" <> text, scheme, opts, {buffer, acc, {:attrs, level}}, handler), do: do_parse(text, scheme, opts, {"", acc <> buffer <> ">", {:html, level}}, handler) - defp do_parse(<<ch::8>> <> text, scheme, opts, {"", acc, {:attrs, level}}, handler), - do: do_parse(text, scheme, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler) + defp do_parse(<<ch::8>> <> text, scheme, opts, {"", acc, {:attrs, level}}, handler) do + do_parse(text, scheme, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler) + end defp do_parse("</" <> text, scheme, opts, {buffer, acc, {:html, level}}, handler), do: @@ -178,13 +227,43 @@ defmodule AutoLinker.Parser do |> link_url(buffer, opts) end + def check_and_link_email(buffer, _, opts) do + buffer + |> is_email? + |> link_email(buffer, opts) + end + def check_and_link_phone(buffer, _, opts) do buffer |> match_phone |> link_phone(buffer, opts) end - @doc false + def check_and_link_mention(buffer, _, opts) do + buffer + |> match_mention + |> link_mention(buffer, opts) + end + + def check_and_link_hashtag(buffer, _, opts) do + buffer + |> match_hashtag + |> link_hashtag(buffer, opts) + end + + def check_and_link_extra("xmpp:" <> handle, _, opts) do + handle + |> is_email? + |> link_extra("xmpp:" <> handle, opts) + end + + def check_and_link_extra(buffer, _, opts) do + buffer + |> String.starts_with?(@prefix_extra) + |> link_extra(buffer, opts) + end + + # @doc false def is_url?(buffer, true) do if Regex.match?(@invalid_url, buffer) do false @@ -201,6 +280,14 @@ defmodule AutoLinker.Parser do end end + def is_email?(buffer) do + if Regex.match?(@invalid_url, buffer) do + false + else + Regex.match?(@match_email, buffer) |> is_valid_tld?(buffer) + end + end + def is_valid_tld?(true, buffer) do [host] = Regex.run(@match_hostname, buffer, capture: [:host]) @@ -227,6 +314,37 @@ defmodule AutoLinker.Parser do end end + def match_mention(buffer) do + case Regex.run(@match_mention, buffer) do + [mention] -> mention + _ -> nil + end + end + + def match_hashtag(buffer) do + case Regex.run(@match_hashtag, buffer, capture: [:tag]) do + [hashtag] -> hashtag + _ -> nil + end + end + + def link_hashtag(nil, buffer, _), do: buffer + + def link_hashtag(hashtag, buffer, opts) do + Builder.create_hashtag_link(hashtag, buffer, opts) + end + + def link_mention(nil, buffer, _), do: buffer + + def link_mention(mention, _buffer, %{mention_formatter: mention_formatter} = opts) do + {buffer, _} = mention_formatter.(mention, opts) + buffer + end + + def link_mention(mention, buffer, opts) do + Builder.create_mention_link(mention, buffer, opts) + end + def link_phone(nil, buffer, _), do: buffer def link_phone(list, buffer, opts) do @@ -239,4 +357,17 @@ defmodule AutoLinker.Parser do end def link_url(_, buffer, _opts), do: buffer + + @doc false + def link_email(true, buffer, opts) do + Builder.create_email_link(buffer, opts) + end + + def link_email(_, buffer, _opts), do: buffer + + def link_extra(true, buffer, opts) do + Builder.create_extra_link(buffer, opts) + end + + def link_extra(_, buffer, _opts), do: buffer end diff --git a/test/auto_linker_test.exs b/test/auto_linker_test.exs @@ -28,6 +28,213 @@ defmodule AutoLinkerTest do " <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>" end + test "all kinds of links" do + text = + "hello @user google.com https://ddg.com 888 888-8888 #tag user@email.com [google.com](http://google.com) irc:///mIRC" + + expected = + "hello <a href='https://example.com/user/user'>@user</a> <a href='http://google.com'>google.com</a> <a href='https://ddg.com'>ddg.com</a> <a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a> <a href='https://example.com/tag/tag'>#tag</a> <a href='mailto:user@email.com' >user@email.com</a> <a href='http://google.com'>google.com</a> <a href='irc:///mIRC' >irc:///mIRC</a>" + + assert AutoLinker.link(text, + phone: true, + markdown: true, + email: true, + mention: true, + mention_prefix: "https://example.com/user/", + hashtag: true, + hashtag_prefix: "https://example.com/tag/", + scheme: true, + extra: true, + class: false, + new_window: false, + rel: false + ) == expected + end + + describe "mentions" do + test "simple mentions" do + expected = + ~s{hello <a href='https://example.com/user/user' class='auto-linker' target='_blank' rel='noopener noreferrer'>@user</a> and <a href='https://example.com/user/anotherUser' class='auto-linker' target='_blank' rel='noopener noreferrer'>@anotherUser</a>} + + assert AutoLinker.link("hello @user and @anotherUser", + mention: true, + mention_prefix: "https://example.com/user/" + ) == expected + end + + test "metion @user@example.com" do + text = "hey @user@example.com" + + expected = + "hey <a href='https://example.com/user/user@example.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>@user@example.com</a>" + + assert AutoLinker.link(text, + mention: true, + mention_prefix: "https://example.com/user/" + ) == expected + end + + test "skip if starts with @@" do + text = "hello @@user and @anotherUser" + + expected = + "hello @@user and <a href='https://example.com/user/anotherUser' class='auto-linker' target='_blank' rel='noopener noreferrer'>@anotherUser</a>" + + assert AutoLinker.link(text, + mention: true, + mention_prefix: "https://example.com/user/" + ) == expected + end + end + + describe "hashtag links" do + test "hashtag" do + expected = + "one <a href='https://example.com/tag/two' class='auto-linker' target='_blank' rel='noopener noreferrer'>#two</a> three <a href='https://example.com/tag/four' class='auto-linker' target='_blank' rel='noopener noreferrer'>#four</a>" + + assert AutoLinker.link("one #two three #four", + hashtag: true, + hashtag_prefix: "https://example.com/tag/" + ) == expected + end + + test "do not turn urls with hashes into hashtags" do + text = "google.com#test #test google.com/#test #tag" + + expected = + "<a href='http://google.com#test'>google.com#test</a> <a href='https://example.com/tag/test'>#test</a> <a href='http://google.com/#test'>google.com/#test</a> <a href='https://example.com/tag/tag'>#tag</a>" + + assert AutoLinker.link(text, + scheme: true, + hashtag: true, + class: false, + new_window: false, + rel: false, + hashtag_prefix: "https://example.com/tag/" + ) == expected + end + + test "works with non-latin characters" do + text = "#漢字 #は #тест #ทดสอบ" + + expected = + "<a href='https://example.com/tag/漢字'>#漢字</a> <a href='https://example.com/tag/は'>#は</a> <a href='https://example.com/tag/тест'>#тест</a> <a href='https://example.com/tag/ทดสอบ'>#ทดสอบ</a>" + + assert AutoLinker.link(text, + scheme: true, + class: false, + new_window: false, + rel: false, + hashtag: true, + hashtag_prefix: "https://example.com/tag/" + ) == expected + end + end + + describe "links" do + test "turning urls into links" do + text = "Hey, check out http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." + + expected = + "Hey, check out <a href='http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla' class='auto-linker' target='_blank' rel='noopener noreferrer'>youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ." + + assert AutoLinker.link(text, scheme: true) == expected + + # no scheme + text = "Hey, check out www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." + assert AutoLinker.link(text, scheme: true) == expected + end + + test "hostname/@user" do + text = "https://example.com/@user" + + expected = + "<a href='https://example.com/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com/@user</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "https://example.com:4000/@user" + + expected = + "<a href='https://example.com:4000/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com:4000/@user</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "https://example.com:4000/@user" + + expected = + "<a href='https://example.com:4000/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com:4000/@user</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "@username" + expected = "@username" + assert AutoLinker.link(text, scheme: true) == expected + + text = "http://www.cs.vu.nl/~ast/intel/" + + expected = + "<a href='http://www.cs.vu.nl/~ast/intel/' class='auto-linker' target='_blank' rel='noopener noreferrer'>cs.vu.nl/~ast/intel/</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" + + expected = + "<a href='https://forum.zdoom.org/viewtopic.php?f=44&t=57087' class='auto-linker' target='_blank' rel='noopener noreferrer'>forum.zdoom.org/viewtopic.php?f=44&t=57087</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" + + expected = + "<a href='https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul' class='auto-linker' target='_blank' rel='noopener noreferrer'>en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>" + + assert AutoLinker.link(text, scheme: true) == expected + + text = "https://en.wikipedia.org/wiki/Duff's_device" + + expected = + "<a href='https://en.wikipedia.org/wiki/Duff's_device' class='auto-linker' target='_blank' rel='noopener noreferrer'>en.wikipedia.org/wiki/Duff's_device</a>" + + assert AutoLinker.link(text, scheme: true) == expected + end + end + + describe "non http links" do + test "xmpp" do + text = "xmpp:user@example.com" + expected = "<a href='xmpp:user@example.com' class='auto-linker'>xmpp:user@example.com</a>" + assert AutoLinker.link(text, extra: true) == expected + end + + test "email" do + text = "user@example.com" + expected = "<a href='mailto:user@example.com' class='auto-linker'>user@example.com</a>" + assert AutoLinker.link(text, email: true) == expected + end + + test "magnet" do + text = + "magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce" + + expected = + "<a href='magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce' class='auto-linker'>magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce</a>" + + assert AutoLinker.link(text, extra: true) == expected + end + + test "dweb" do + text = + "dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt" + + expected = + "<a href='dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt' class='auto-linker'>dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt</a>" + + assert AutoLinker.link(text, extra: true) == expected + end + end + describe "TLDs" do test "parse with scheme" do text = "https://google.com"