logo

news_parse_ex

news_parse_ex_test.exs (6581B)


  1. # NewsParseEx: RSS/Atom parser
  2. # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule NewsParseExTest do
  5. use ExUnit.Case, async: true
  6. alias NewsParseEx.XML
  7. doctest NewsParseEx
  8. describe "Parses Atom Feed Document" do
  9. test "basic Atom feed" do
  10. feed = ~s[<?xml version="1.0" encoding="utf-8"?>
  11. <feed xmlns="http://www.w3.org/2005/Atom">
  12. <title>Test Title</title>
  13. <id>https://example.org/feed/</id>
  14. <updated>2021-11-01T16:09:55Z</updated>
  15. </feed>]
  16. {:ok, parsed} = NewsParseEx.parse(feed)
  17. assert(parsed.title == "Test Title")
  18. assert(parsed.description == nil)
  19. assert(parsed.id == "https://example.org/feed/")
  20. assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
  21. assert(parsed.entries == [])
  22. end
  23. test "Gitlab atom feed" do
  24. feed = File.read!("test/fixtures/gitlab/wlroots-2022-12-27.atom")
  25. {:ok, parsed} = NewsParseEx.parse(feed)
  26. assert(parsed.title == "wlroots tags")
  27. assert(parsed.description == nil)
  28. assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags")
  29. assert(parsed.last_update == ~U[2021-11-01T16:09:55Z])
  30. assert(length(parsed.entries) == 20)
  31. entry_0 = Enum.at(parsed.entries, 0)
  32. assert(
  33. Map.get(entry_0, :id) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
  34. )
  35. assert(Map.get(entry_0, :description) |> String.split("\n") |> length == 37)
  36. assert(
  37. Map.get(entry_0, :link) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
  38. )
  39. assert(Map.get(entry_0, :updated) == ~U[2022-12-25T15:56:39Z])
  40. assert(Map.get(entry_0, :published) == nil)
  41. assert(Map.get(entry_0, :title) == "0.16.1")
  42. end
  43. test "RFC4287 brief, single-entry Atom Feed Document" do
  44. feed = File.read!("test/fixtures/rfc4287/brief_single_entry.atom")
  45. {:ok, parsed} = NewsParseEx.parse(feed)
  46. assert(parsed.title == "Example Feed")
  47. assert(parsed.description == nil)
  48. assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6")
  49. assert(parsed.last_update == ~U[2003-12-13T18:30:02Z])
  50. assert(
  51. parsed.entries == [
  52. %{
  53. id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
  54. description: "Some text.",
  55. link: "http://example.org/2003/12/13/atom03",
  56. updated: ~U[2003-12-13T18:30:02Z],
  57. title: "Atom-Powered Robots Run Amok"
  58. }
  59. ]
  60. )
  61. end
  62. test "RFC4287 more extensive, single-entry Atom Feed Document" do
  63. feed = File.read!("test/fixtures/rfc4287/extensive_single_entry.atom")
  64. {:ok, parsed} = NewsParseEx.parse(feed)
  65. assert(parsed.title == "dive into mark")
  66. assert(parsed.description == nil)
  67. assert(parsed.id == "tag:example.org,2003:3")
  68. assert(parsed.last_update == ~U[2005-07-31T12:29:29Z])
  69. # 2003-12-13 08:29:29-04:00
  70. published = %DateTime{
  71. year: 2003,
  72. month: 12,
  73. day: 13,
  74. hour: 08,
  75. minute: 29,
  76. second: 29,
  77. std_offset: 0,
  78. utc_offset: -4 * 60 * 60,
  79. time_zone: "Etc/UTC-4",
  80. zone_abbr: "-04"
  81. }
  82. assert(
  83. [
  84. %{
  85. title: "Atom draft-07 snapshot",
  86. link: "http://example.org/2005/04/02/atom",
  87. id: "tag:example.org,2003:3.2397",
  88. updated: ~U[2005-07-31 12:29:29Z],
  89. published: ^published,
  90. content: {:xhtml, _content},
  91. description: ""
  92. }
  93. ] = parsed.entries
  94. )
  95. end
  96. end
  97. describe "Parses Atom title" do
  98. setup do
  99. %{feed: ~s[<?xml version="1.0" encoding="utf-8"?>
  100. <feed xmlns="http://www.w3.org/2005/Atom">
  101. {{title}}
  102. <id>https://example.org/feed/</id>
  103. <updated>2021-11-01T16:09:55Z</updated>
  104. </feed>]}
  105. end
  106. test "plain text", %{feed: feed} do
  107. {:ok, parsed} =
  108. feed
  109. |> String.replace("{{title}}", ~s{<title type="text">Less: &lt;</title>})
  110. |> NewsParseEx.parse()
  111. assert(parsed.title == "Less: <")
  112. end
  113. test "HTML", %{feed: feed} do
  114. {:ok, parsed} =
  115. feed
  116. |> String.replace(
  117. "{{title}}",
  118. ~s{<title type="html">Less: &lt;em> &amp;lt; &lt;/em></title>}
  119. )
  120. |> NewsParseEx.parse()
  121. assert(
  122. parsed.title ==
  123. {:html, [{:xmlText, [title: 2, feed: 1], 1, [], 'Less: <em> &lt; </em>', :text}]}
  124. )
  125. end
  126. test "XHTML xhtml:div", %{feed: feed} do
  127. title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
  128. <xhtml:div>
  129. Less: <xhtml:em> &lt; </xhtml:em>
  130. </xhtml:div>
  131. </title>}
  132. {:ok, parsed} =
  133. feed
  134. |> String.replace("{{title}}", title)
  135. |> NewsParseEx.parse()
  136. assert({:xhtml, [title]} = parsed.title)
  137. assert(XML.string_from_doc(title) == "Less: \n <")
  138. end
  139. test "XHTML div", %{feed: feed} do
  140. title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
  141. <div xmlns="http://www.w3.org/1999/xhtml">
  142. Less: <em> &lt; </em>
  143. </div>
  144. </title>}
  145. {:ok, parsed} =
  146. feed
  147. |> String.replace("{{title}}", title)
  148. |> NewsParseEx.parse()
  149. assert({:xhtml, [title]} = parsed.title)
  150. assert(XML.string_from_doc(title) == "Less: \n <")
  151. end
  152. end
  153. describe "Parses RSS Document" do
  154. test "git.sr.ht tags" do
  155. feed = File.read!("test/fixtures/git.sr.ht/pkgconf-2022-12-27.rss")
  156. {:ok, parsed} = NewsParseEx.parse(feed)
  157. assert(parsed.title == "~kaniini/pkgconf refs")
  158. assert(parsed.description == "Git refs for ~kaniini/pkgconf")
  159. assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs")
  160. assert(parsed.last_update == nil)
  161. assert(length(parsed.entries) == 20)
  162. # Sat, 30 May 2020 19:17:12 -0600
  163. # #DateTime<2020-05-30 19:17:12-06:00 -06 Etc/UTC-6>
  164. published = %DateTime{
  165. year: 2020,
  166. month: 05,
  167. day: 30,
  168. hour: 19,
  169. minute: 17,
  170. second: 12,
  171. std_offset: 0,
  172. utc_offset: -6 * 60 * 60,
  173. time_zone: "Etc/UTC-6",
  174. zone_abbr: "-06"
  175. }
  176. assert(
  177. Enum.at(parsed.entries, 0) == %{
  178. id: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
  179. title: "pkgconf-1.7.3",
  180. description: "pkgconf 1.7.3.",
  181. link: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
  182. published: published
  183. }
  184. )
  185. end
  186. end
  187. end