Do not parse html links inside html tags - auto_linker - AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto

commit: e5a8cf2b08edfe22f776d9c5f500029747daf1c8
parent 86afaf49e5eceaaa2b2b72f1e290eeb6236ca020
Author: Sergey Suprunenko <suprunenko.s@gmail.com>
Date:   Sat, 29 Aug 2020 23:29:07 +0200

Do not parse html links inside html tags

Diffstat:
M lib/linkify/parser.ex 15 +++++++--------
M test/parser_test.exs 14 ++++++++++++++

2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex
@@ -128,13 +128,8 @@ defmodule Linkify.Parser do
     do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}})
   end
 
-  defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}),
-    do:
-      do_parse(
-        {text, user_acc},
-        opts,
-        {"", accumulate(acc, buffer, ">"), {:html, level}}
-      )
+  defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}),
+    do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing})
 
   defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do
     do_parse({text, user_acc}, opts, {"", accumulate(acc, <<ch::8>>), {:attrs, level}})
@@ -194,7 +189,11 @@ defmodule Linkify.Parser do
     do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state})
 
   def check_and_link(:url, buffer, opts, _user_acc) do
-    str = strip_parens(buffer)
+    str =
+      buffer
+      |> String.split("<")
+      |> List.first()
+      |> strip_parens()
 
     if url?(str, opts) do
       case @match_url |> Regex.run(str, capture: [:url]) |> hd() do
diff --git a/test/parser_test.exs b/test/parser_test.exs
@@ -155,6 +155,20 @@ defmodule Linkify.ParserTest do
       assert parse(text, class: false, rel: false) == expected
     end
 
+    test "html links inside html" do
+      text = ~s(<p><a href="http://google.com">google.com</a></p>)
+      assert parse(text) == text
+
+      text = ~s(<span><a href="http://google.com">google.com</a></span>)
+      assert parse(text) == text
+
+      text = ~s(<h1><a href="http://google.com">google.com</a></h1>)
+      assert parse(text) == text
+
+      text = ~s(<li><a href="http://google.com">google.com</a></li>)
+      assert parse(text) == text
+    end
+
     test "do not link parens" do
       text = " foo (https://example.com/path/folder/), bar"

M	lib/linkify/parser.ex	15	+++++++--------
M	test/parser_test.exs	14	++++++++++++++