logo

auto_linker

AutoLinker-shim, based on https://git.pleroma.social/pleroma/auto_linker git clone https://hacktivis.me/git/auto_linker.git

parser_test.exs (8178B)


  1. defmodule Linkify.ParserTest do
  2. use ExUnit.Case, async: true
  3. doctest Linkify.Parser
  4. import Linkify.Parser
  5. describe "url?/2" do
  6. test "valid scheme true" do
  7. valid_scheme_urls()
  8. |> Enum.each(fn url ->
  9. assert url?(url, scheme: true, validate_tld: true)
  10. end)
  11. end
  12. test "invalid scheme true" do
  13. invalid_scheme_urls()
  14. |> Enum.each(fn url ->
  15. refute url?(url, scheme: true, validate_tld: true)
  16. end)
  17. end
  18. test "valid scheme false" do
  19. valid_non_scheme_urls()
  20. |> Enum.each(fn url ->
  21. refute url?(url, scheme: false, validate_tld: true)
  22. end)
  23. end
  24. test "invalid scheme false" do
  25. invalid_non_scheme_urls()
  26. |> Enum.each(fn url ->
  27. refute url?(url, scheme: false, validate_tld: true)
  28. end)
  29. end
  30. test "does not checks the tld for url with a scheme when validate_tld: true" do
  31. custom_tld_scheme_urls()
  32. |> Enum.each(fn url ->
  33. assert url?(url, scheme: true, validate_tld: true)
  34. end)
  35. end
  36. test "does not check the tld for url with a scheme when validate_tld: false" do
  37. custom_tld_scheme_urls()
  38. |> Enum.each(fn url ->
  39. assert url?(url, scheme: true, validate_tld: false)
  40. end)
  41. end
  42. test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do
  43. custom_tld_scheme_urls()
  44. |> Enum.each(fn url ->
  45. assert url?(url, scheme: true, validate_tld: :no_scheme)
  46. end)
  47. end
  48. test "does not links for scheme-less urls" do
  49. custom_tld_non_scheme_urls()
  50. |> Enum.each(fn url ->
  51. refute url?(url, scheme: false, validate_tld: true)
  52. end)
  53. custom_tld_non_scheme_urls()
  54. |> Enum.each(fn url ->
  55. refute url?(url, scheme: false, validate_tld: :no_scheme)
  56. end)
  57. end
  58. end
  59. describe "email?" do
  60. test "identifies valid emails" do
  61. valid_emails()
  62. |> Enum.each(fn email ->
  63. assert email?(email, [])
  64. end)
  65. end
  66. test "identifies invalid emails" do
  67. invalid_emails()
  68. |> Enum.each(fn email ->
  69. refute email?(email, [])
  70. end)
  71. end
  72. end
  73. describe "parse" do
  74. test "handle line breakes" do
  75. text = "http://google.com\r\nssss"
  76. expected = "<a href=\"http://google.com\">http://google.com</a>\r\nssss"
  77. assert parse(text) == expected
  78. end
  79. test "handle angle bracket in the end" do
  80. text = "http://google.com <br>"
  81. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a> <br>"
  82. text = "http://google.com<br>hey"
  83. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a><br>hey"
  84. text = "hey<br>http://google.com"
  85. assert parse(text) == "hey<br><a href=\"http://google.com\">http://google.com</a>"
  86. text = "<br />http://google.com"
  87. assert parse(text) == "<br /><a href=\"http://google.com\">http://google.com</a>"
  88. text = "http://google.com<"
  89. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a><"
  90. text = "http://google.com>"
  91. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a>>"
  92. end
  93. test "does not link attributes" do
  94. text = "Check out <a href='google.com'>google</a>"
  95. assert parse(text) == text
  96. text = "Check out <img src='google.com' alt='google.com'/>"
  97. assert parse(text) == text
  98. text = "Check out <span><img src='google.com' alt='google.com'/></span>"
  99. assert parse(text) == text
  100. end
  101. test "does not link inside `<pre>` and `<code>`" do
  102. text = "<pre>google.com</pre>"
  103. assert parse(text) == text
  104. text = "<code>google.com</code>"
  105. assert parse(text) == text
  106. text = "<pre><code>google.com</code></pre>"
  107. assert parse(text) == text
  108. end
  109. test "links url inside html" do
  110. text = "<div>http://google.com</div>"
  111. expected = "<div><a href=\"http://google.com\">http://google.com</a></div>"
  112. assert parse(text, class: false, rel: false) == expected
  113. text = "Check out <div class='section'>http://google.com</div>"
  114. expected =
  115. "Check out <div class='section'><a href=\"http://google.com\">http://google.com</a></div>"
  116. assert parse(text, class: false, rel: false) == expected
  117. end
  118. test "links url inside nested html" do
  119. text = "<p><strong>http://google.com</strong></p>"
  120. expected = "<p><strong><a href=\"http://google.com\">http://google.com</a></strong></p>"
  121. assert parse(text, class: false, rel: false) == expected
  122. end
  123. test "html links inside html" do
  124. text = ~s(<p><a href="http://google.com">google.com</a></p>)
  125. assert parse(text) == text
  126. text = ~s(<span><a href="http://google.com">google.com</a></span>)
  127. assert parse(text) == text
  128. text = ~s(<h1><a href="http://google.com">google.com</a></h1>)
  129. assert parse(text) == text
  130. text = ~s(<li><a href="http://google.com">google.com</a></li>)
  131. assert parse(text) == text
  132. end
  133. test "do not link parens" do
  134. text = " foo (https://example.com/path/folder/), bar"
  135. expected =
  136. " foo (<a href=\"https://example.com/path/folder/\">https://example.com/path/folder/</a>), bar"
  137. assert parse(text, class: false, rel: false, scheme: true) == expected
  138. end
  139. test "do not link punctuation marks in the end" do
  140. text = "http://google.com."
  141. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a>."
  142. text = "http://google.com;"
  143. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a>;"
  144. text = "http://google.com:"
  145. assert parse(text) == "<a href=\"http://google.com\">http://google.com</a>:"
  146. text = "hack http://google.com, please"
  147. assert parse(text) == "hack <a href=\"http://google.com\">http://google.com</a>, please"
  148. text = "(check out http://google.com)"
  149. assert parse(text) == "(check out <a href=\"http://google.com\">http://google.com</a>)"
  150. end
  151. test "do not link urls" do
  152. text = "google.com"
  153. assert parse(text, url: false) == text
  154. end
  155. test "does not link `:test.test`" do
  156. text = ":test.test"
  157. assert parse(text, %{
  158. scheme: true,
  159. extra: true,
  160. class: false,
  161. strip_prefix: false,
  162. new_window: false,
  163. rel: false
  164. }) == text
  165. end
  166. end
  167. def valid_number?([list], number) do
  168. assert List.last(list) == number
  169. end
  170. def valid_number?(_, _), do: false
  171. def valid_scheme_urls,
  172. do: [
  173. "https://www.example.com",
  174. "http://www2.example.com",
  175. "http://home.example-site.com",
  176. "http://blog.example.com",
  177. "http://www.example.com/product",
  178. "http://www.example.com/products?id=1&page=2",
  179. "http://www.example.com#up",
  180. "http://255.255.255.255",
  181. "http://www.site.com:8008"
  182. ]
  183. def invalid_scheme_urls,
  184. do: [
  185. "http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2"
  186. ]
  187. def valid_non_scheme_urls,
  188. do: [
  189. "www.example.com",
  190. "www2.example.com",
  191. "www.example.com:2000",
  192. "www.example.com?abc=1",
  193. "example.example-site.com",
  194. "example.com",
  195. "example.ca",
  196. "example.tv",
  197. "example.com:999?one=one",
  198. "255.255.255.255",
  199. "255.255.255.255:3000?one=1&two=2"
  200. ]
  201. def invalid_non_scheme_urls,
  202. do: [
  203. "invalid.com/perl.cgi?key= | web-site.com/cgi-bin/perl.cgi?key1=value1&key2",
  204. "invalid.",
  205. "hi..there",
  206. "555.555.5555"
  207. ]
  208. def custom_tld_scheme_urls,
  209. do: [
  210. "http://whatever.null/",
  211. "https://example.o/index.html",
  212. "http://pleroma.i2p/test",
  213. "http://misskey.loki"
  214. ]
  215. def custom_tld_non_scheme_urls,
  216. do: [
  217. "whatever.null/",
  218. "example.o/index.html",
  219. "pleroma.i2p/test",
  220. "misskey.loki"
  221. ]
  222. def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"]
  223. def invalid_emails, do: ["rms[at]ai.mit.edu"]
  224. def valid_custom_tld_emails, do: ["hi@company.null"]
  225. end