commit: a3ec8eb797cfa81df7c8f605ec4ed57b74294430
parent: 479dd343f4e563ff91215c8275f3b5c67e032850
Author: Egor <egor@kislitsyn.com>
Date: Tue, 9 Apr 2019 08:05:29 +0000
Merge branch 'improve-parsing' into 'master'
Improve Parser
See merge request pleroma/auto_linker!10
Diffstat:
5 files changed, 114 insertions(+), 42 deletions(-)
diff --git a/lib/auto_linker/builder.ex b/lib/auto_linker/builder.ex
@@ -82,7 +82,7 @@ defmodule AutoLinker.Builder do
defp truncate(url, len) when len < 3, do: url
defp truncate(url, len) do
- if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "..", else: url
+ if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "...", else: url
end
defp strip_prefix(url, true) do
diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex
@@ -5,31 +5,11 @@ defmodule AutoLinker.Parser do
alias AutoLinker.Builder
- @doc """
- Parse the given string, identifying items to link.
-
- Parses the string, replacing the matching urls and phone numbers with an html link.
-
- ## Examples
-
- iex> AutoLinker.Parser.parse("Check out google.com")
- ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
-
- iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
- ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
-
- iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
- ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
-
- iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
- ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
- """
-
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
- @match_scheme ~r{^(?:\W*)?(?<url>(?:\W*https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
+ @match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
@@ -64,6 +44,26 @@ defmodule AutoLinker.Parser do
@default_opts ~w(url)a
+ @doc """
+ Parse the given string, identifying items to link.
+
+ Parses the string, replacing the matching urls and phone numbers with an html link.
+
+ ## Examples
+
+ iex> AutoLinker.Parser.parse("Check out google.com")
+ ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
+
+ iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
+ ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
+
+ iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
+ ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
+
+ iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
+ ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
+ """
+
def parse(input, opts \\ %{})
def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
@@ -154,20 +154,31 @@ defmodule AutoLinker.Parser do
defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
do: {acc, user_acc}
- defp do_parse({"", user_acc}, opts, {buffer, acc, _}, handler) do
- {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
- {acc <> buffer, user_acc}
- end
-
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
+ defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
+ do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
+
+ defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
+ do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
+
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
+ defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
+ do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
+
+ defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
+ do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
+
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
+ defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
+ do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
+ end
+
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
do:
do_parse(
@@ -204,19 +215,8 @@ defmodule AutoLinker.Parser do
handler
)
- defp do_parse(
- {<<char::bytes-size(1), text::binary>>, user_acc},
- opts,
- {buffer, acc, {:open, level}},
- handler
- )
- when char in [" ", "\r", "\n"] do
- do_parse(
- {text, user_acc},
- opts,
- {"", acc <> buffer <> char, {:attrs, level}},
- handler
- )
+ defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
+ do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
end
# default cases where state is not important
diff --git a/test/auto_linker_test.exs b/test/auto_linker_test.exs
@@ -61,6 +61,24 @@ defmodule AutoLinkerTest do
new_window: false,
rel: custom_rel
) == expected
+
+ text = "google.com"
+
+ expected = "<a href=\"http://google.com\">google.com</a>"
+
+ custom_rel = fn _ -> nil end
+
+ assert AutoLinker.link(text,
+ class: false,
+ new_window: false,
+ rel: custom_rel
+ ) == expected
+ end
+
+ test "link_map/2" do
+ assert AutoLinker.link_map("google.com", []) ==
+ {"<a href=\"http://google.com\" class=\"auto-linker\" target=\"_blank\" rel=\"noopener noreferrer\">google.com</a>",
+ []}
end
describe "custom handlers" do
@@ -144,6 +162,22 @@ defmodule AutoLinkerTest do
) == expected
end
+ test "mentions inside html tags" do
+ text =
+ "<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n"
+
+ expected =
+ "<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n"
+
+ assert AutoLinker.link(text,
+ mention: true,
+ mention_prefix: "u/",
+ class: false,
+ rel: false,
+ new_window: false
+ ) == expected
+ end
+
test "metion @user@example.com" do
text = "hey @user@example.com"
diff --git a/test/builder_test.exs b/test/builder_test.exs
@@ -17,6 +17,16 @@ defmodule AutoLinker.BuilderTest do
"<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\" rel=\"me\">text</a>"
assert create_link("text", %{rel: "me"}) == expected
+
+ expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">t...</a>"
+
+ assert create_link("text", %{truncate: 3, rel: false}) == expected
+
+ expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">text</a>"
+ assert create_link("text", %{truncate: 2, rel: false}) == expected
+
+ expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">http://text</a>"
+ assert create_link("http://text", %{rel: false, strip_prefix: false}) == expected
end
test "create_markdown_links/2" do
@@ -52,9 +62,9 @@ defmodule AutoLinker.BuilderTest do
phrase = "my exten is x888. Call me."
expected =
- ~s'my exten is <a href="#" class="phone-number" data-phone="888">x888</a>. Call me.'
+ ~s'my exten is <a href="#" class="phone-number" data-phone="888" test=\"test\">x888</a>. Call me.'
- assert create_phone_link([["x888", ""]], phrase, []) == expected
+ assert create_phone_link([["x888", ""]], phrase, attributes: [test: "test"]) == expected
end
test "handles multiple links" do
diff --git a/test/parser_test.exs b/test/parser_test.exs
@@ -69,7 +69,24 @@ defmodule AutoLinker.ParserTest do
assert parse(text) == text
end
+ test "does not link inside `<pre>` and `<code>`" do
+ text = "<pre>google.com</pre>"
+ assert parse(text) == text
+
+ text = "<code>google.com</code>"
+ assert parse(text) == text
+
+ text = "<pre><code>google.com</code></pre>"
+ assert parse(text) == text
+ end
+
test "links url inside html" do
+ text = "<div>google.com</div>"
+
+ expected = "<div><a href=\"http://google.com\">google.com</a></div>"
+
+ assert parse(text, class: false, rel: false, new_window: false, phone: false) == expected
+
text = "Check out <div class='section'>google.com</div>"
expected =
@@ -78,10 +95,21 @@ defmodule AutoLinker.ParserTest do
assert parse(text, class: false, rel: false, new_window: false) == expected
end
+ test "links url inside nested html" do
+ text = "<p><strong>google.com</strong></p>"
+ expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>"
+ assert parse(text, class: false, rel: false, new_window: false) == expected
+ end
+
test "excludes html with specified class" do
text = "```Check out <div class='section'>google.com</div>```"
assert parse(text, exclude_patterns: ["```"]) == text
end
+
+ test "do not link urls" do
+ text = "google.com"
+ assert parse(text, url: false, phone: true) == text
+ end
end
def valid_number?([list], number) do