commit: e5a8cf2b08edfe22f776d9c5f500029747daf1c8
parent 86afaf49e5eceaaa2b2b72f1e290eeb6236ca020
Author: Sergey Suprunenko <suprunenko.s@gmail.com>
Date: Sat, 29 Aug 2020 23:29:07 +0200
Do not parse html links inside html tags
Diffstat:
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex
@@ -128,13 +128,8 @@ defmodule Linkify.Parser do
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}})
end
- defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}),
- do:
- do_parse(
- {text, user_acc},
- opts,
- {"", accumulate(acc, buffer, ">"), {:html, level}}
- )
+ defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}),
+ do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing})
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do
do_parse({text, user_acc}, opts, {"", accumulate(acc, <<ch::8>>), {:attrs, level}})
@@ -194,7 +189,11 @@ defmodule Linkify.Parser do
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state})
def check_and_link(:url, buffer, opts, _user_acc) do
- str = strip_parens(buffer)
+ str =
+ buffer
+ |> String.split("<")
+ |> List.first()
+ |> strip_parens()
if url?(str, opts) do
case @match_url |> Regex.run(str, capture: [:url]) |> hd() do
diff --git a/test/parser_test.exs b/test/parser_test.exs
@@ -155,6 +155,20 @@ defmodule Linkify.ParserTest do
assert parse(text, class: false, rel: false) == expected
end
+ test "html links inside html" do
+ text = ~s(<p><a href="http://google.com">google.com</a></p>)
+ assert parse(text) == text
+
+ text = ~s(<span><a href="http://google.com">google.com</a></span>)
+ assert parse(text) == text
+
+ text = ~s(<h1><a href="http://google.com">google.com</a></h1>)
+ assert parse(text) == text
+
+ text = ~s(<li><a href="http://google.com">google.com</a></li>)
+ assert parse(text) == text
+ end
+
test "do not link parens" do
text = " foo (https://example.com/path/folder/), bar"