logo

auto_linker

AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto_linker
commit: a3ec8eb797cfa81df7c8f605ec4ed57b74294430
parent: 479dd343f4e563ff91215c8275f3b5c67e032850
Author: Egor <egor@kislitsyn.com>
Date:   Tue,  9 Apr 2019 08:05:29 +0000

Merge branch 'improve-parsing' into 'master'

Improve Parser

See merge request pleroma/auto_linker!10

Diffstat:

Mlib/auto_linker/builder.ex2+-
Mlib/auto_linker/parser.ex78+++++++++++++++++++++++++++++++++++++++---------------------------------------
Mtest/auto_linker_test.exs34++++++++++++++++++++++++++++++++++
Mtest/builder_test.exs14++++++++++++--
Mtest/parser_test.exs28++++++++++++++++++++++++++++
5 files changed, 114 insertions(+), 42 deletions(-)

diff --git a/lib/auto_linker/builder.ex b/lib/auto_linker/builder.ex @@ -82,7 +82,7 @@ defmodule AutoLinker.Builder do defp truncate(url, len) when len < 3, do: url defp truncate(url, len) do - if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "..", else: url + if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "...", else: url end defp strip_prefix(url, true) do diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex @@ -5,31 +5,11 @@ defmodule AutoLinker.Parser do alias AutoLinker.Builder - @doc """ - Parse the given string, identifying items to link. - - Parses the string, replacing the matching urls and phone numbers with an html link. - - ## Examples - - iex> AutoLinker.Parser.parse("Check out google.com") - ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>} - - iex> AutoLinker.Parser.parse("call me at x9999", phone: true) - ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>} - - iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true) - ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>} - - iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true) - ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>} - """ - @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/ @match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$} - @match_scheme ~r{^(?:\W*)?(?<url>(?:\W*https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u + @match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))" @@ -64,6 +44,26 @@ defmodule AutoLinker.Parser do @default_opts ~w(url)a + @doc """ + Parse the given string, identifying items to link. + + Parses the string, replacing the matching urls and phone numbers with an html link. + + ## Examples + + iex> AutoLinker.Parser.parse("Check out google.com") + ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>} + + iex> AutoLinker.Parser.parse("call me at x9999", phone: true) + ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>} + + iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true) + ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>} + + iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true) + ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>} + """ + def parse(input, opts \\ %{}) def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0) def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{})) @@ -154,20 +154,31 @@ defmodule AutoLinker.Parser do defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler), do: {acc, user_acc} - defp do_parse({"", user_acc}, opts, {buffer, acc, _}, handler) do - {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc) - {acc <> buffer, user_acc} - end - defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler) + defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), + do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler) + + defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler), + do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler) + defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler) + defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), + do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler) + + defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler), + do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler) + defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler), do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler) + defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do + do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler) + end + defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler), do: do_parse( @@ -204,19 +215,8 @@ defmodule AutoLinker.Parser do handler ) - defp do_parse( - {<<char::bytes-size(1), text::binary>>, user_acc}, - opts, - {buffer, acc, {:open, level}}, - handler - ) - when char in [" ", "\r", "\n"] do - do_parse( - {text, user_acc}, - opts, - {"", acc <> buffer <> char, {:attrs, level}}, - handler - ) + defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do + do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler) end # default cases where state is not important diff --git a/test/auto_linker_test.exs b/test/auto_linker_test.exs @@ -61,6 +61,24 @@ defmodule AutoLinkerTest do new_window: false, rel: custom_rel ) == expected + + text = "google.com" + + expected = "<a href=\"http://google.com\">google.com</a>" + + custom_rel = fn _ -> nil end + + assert AutoLinker.link(text, + class: false, + new_window: false, + rel: custom_rel + ) == expected + end + + test "link_map/2" do + assert AutoLinker.link_map("google.com", []) == + {"<a href=\"http://google.com\" class=\"auto-linker\" target=\"_blank\" rel=\"noopener noreferrer\">google.com</a>", + []} end describe "custom handlers" do @@ -144,6 +162,22 @@ defmodule AutoLinkerTest do ) == expected end + test "mentions inside html tags" do + text = + "<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n" + + expected = + "<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n" + + assert AutoLinker.link(text, + mention: true, + mention_prefix: "u/", + class: false, + rel: false, + new_window: false + ) == expected + end + test "metion @user@example.com" do text = "hey @user@example.com" diff --git a/test/builder_test.exs b/test/builder_test.exs @@ -17,6 +17,16 @@ defmodule AutoLinker.BuilderTest do "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\" rel=\"me\">text</a>" assert create_link("text", %{rel: "me"}) == expected + + expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">t...</a>" + + assert create_link("text", %{truncate: 3, rel: false}) == expected + + expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">text</a>" + assert create_link("text", %{truncate: 2, rel: false}) == expected + + expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">http://text</a>" + assert create_link("http://text", %{rel: false, strip_prefix: false}) == expected end test "create_markdown_links/2" do @@ -52,9 +62,9 @@ defmodule AutoLinker.BuilderTest do phrase = "my exten is x888. Call me." expected = - ~s'my exten is <a href="#" class="phone-number" data-phone="888">x888</a>. Call me.' + ~s'my exten is <a href="#" class="phone-number" data-phone="888" test=\"test\">x888</a>. Call me.' - assert create_phone_link([["x888", ""]], phrase, []) == expected + assert create_phone_link([["x888", ""]], phrase, attributes: [test: "test"]) == expected end test "handles multiple links" do diff --git a/test/parser_test.exs b/test/parser_test.exs @@ -69,7 +69,24 @@ defmodule AutoLinker.ParserTest do assert parse(text) == text end + test "does not link inside `<pre>` and `<code>`" do + text = "<pre>google.com</pre>" + assert parse(text) == text + + text = "<code>google.com</code>" + assert parse(text) == text + + text = "<pre><code>google.com</code></pre>" + assert parse(text) == text + end + test "links url inside html" do + text = "<div>google.com</div>" + + expected = "<div><a href=\"http://google.com\">google.com</a></div>" + + assert parse(text, class: false, rel: false, new_window: false, phone: false) == expected + text = "Check out <div class='section'>google.com</div>" expected = @@ -78,10 +95,21 @@ defmodule AutoLinker.ParserTest do assert parse(text, class: false, rel: false, new_window: false) == expected end + test "links url inside nested html" do + text = "<p><strong>google.com</strong></p>" + expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>" + assert parse(text, class: false, rel: false, new_window: false) == expected + end + test "excludes html with specified class" do text = "```Check out <div class='section'>google.com</div>```" assert parse(text, exclude_patterns: ["```"]) == text end + + test "do not link urls" do + text = "google.com" + assert parse(text, url: false, phone: true) == text + end end def valid_number?([list], number) do