news_parse_ex_test.exs (6581B)
- # NewsParseEx: RSS/Atom parser
- # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
- # SPDX-License-Identifier: AGPL-3.0-only
- defmodule NewsParseExTest do
- use ExUnit.Case, async: true
- alias NewsParseEx.XML
- doctest NewsParseEx
- describe "Parses Atom Feed Document" do
- test "basic Atom feed" do
- feed = ~s[<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>Test Title</title>
- <id>https://example.org/feed/</id>
- <updated>2021-11-01T16:09:55Z</updated>
- </feed>]
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "Test Title")
- assert(parsed.description == nil)
- assert(parsed.id == "https://example.org/feed/")
- assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
- assert(parsed.entries == [])
- end
- test "Gitlab atom feed" do
- feed = File.read!("test/fixtures/gitlab/wlroots-2022-12-27.atom")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "wlroots tags")
- assert(parsed.description == nil)
- assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags")
- assert(parsed.last_update == ~U[2021-11-01T16:09:55Z])
- assert(length(parsed.entries) == 20)
- entry_0 = Enum.at(parsed.entries, 0)
- assert(
- Map.get(entry_0, :id) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
- )
- assert(Map.get(entry_0, :description) |> String.split("\n") |> length == 37)
- assert(
- Map.get(entry_0, :link) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
- )
- assert(Map.get(entry_0, :updated) == ~U[2022-12-25T15:56:39Z])
- assert(Map.get(entry_0, :published) == nil)
- assert(Map.get(entry_0, :title) == "0.16.1")
- end
- test "RFC4287 brief, single-entry Atom Feed Document" do
- feed = File.read!("test/fixtures/rfc4287/brief_single_entry.atom")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "Example Feed")
- assert(parsed.description == nil)
- assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6")
- assert(parsed.last_update == ~U[2003-12-13T18:30:02Z])
- assert(
- parsed.entries == [
- %{
- id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
- description: "Some text.",
- link: "http://example.org/2003/12/13/atom03",
- updated: ~U[2003-12-13T18:30:02Z],
- title: "Atom-Powered Robots Run Amok"
- }
- ]
- )
- end
- test "RFC4287 more extensive, single-entry Atom Feed Document" do
- feed = File.read!("test/fixtures/rfc4287/extensive_single_entry.atom")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "dive into mark")
- assert(parsed.description == nil)
- assert(parsed.id == "tag:example.org,2003:3")
- assert(parsed.last_update == ~U[2005-07-31T12:29:29Z])
- # 2003-12-13 08:29:29-04:00
- published = %DateTime{
- year: 2003,
- month: 12,
- day: 13,
- hour: 08,
- minute: 29,
- second: 29,
- std_offset: 0,
- utc_offset: -4 * 60 * 60,
- time_zone: "Etc/UTC-4",
- zone_abbr: "-04"
- }
- assert(
- [
- %{
- title: "Atom draft-07 snapshot",
- link: "http://example.org/2005/04/02/atom",
- id: "tag:example.org,2003:3.2397",
- updated: ~U[2005-07-31 12:29:29Z],
- published: ^published,
- content: {:xhtml, _content},
- description: ""
- }
- ] = parsed.entries
- )
- end
- end
- describe "Parses Atom title" do
- setup do
- %{feed: ~s[<?xml version="1.0" encoding="utf-8"?>
- <feed xmlns="http://www.w3.org/2005/Atom">
- {{title}}
- <id>https://example.org/feed/</id>
- <updated>2021-11-01T16:09:55Z</updated>
- </feed>]}
- end
- test "plain text", %{feed: feed} do
- {:ok, parsed} =
- feed
- |> String.replace("{{title}}", ~s{<title type="text">Less: <</title>})
- |> NewsParseEx.parse()
- assert(parsed.title == "Less: <")
- end
- test "HTML", %{feed: feed} do
- {:ok, parsed} =
- feed
- |> String.replace(
- "{{title}}",
- ~s{<title type="html">Less: <em> &lt; </em></title>}
- )
- |> NewsParseEx.parse()
- assert(
- parsed.title ==
- {:html, [{:xmlText, [title: 2, feed: 1], 1, [], 'Less: <em> < </em>', :text}]}
- )
- end
- test "XHTML xhtml:div", %{feed: feed} do
- title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
- <xhtml:div>
- Less: <xhtml:em> < </xhtml:em>
- </xhtml:div>
- </title>}
- {:ok, parsed} =
- feed
- |> String.replace("{{title}}", title)
- |> NewsParseEx.parse()
- assert({:xhtml, [title]} = parsed.title)
- assert(XML.string_from_doc(title) == "Less: \n <")
- end
- test "XHTML div", %{feed: feed} do
- title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
- <div xmlns="http://www.w3.org/1999/xhtml">
- Less: <em> < </em>
- </div>
- </title>}
- {:ok, parsed} =
- feed
- |> String.replace("{{title}}", title)
- |> NewsParseEx.parse()
- assert({:xhtml, [title]} = parsed.title)
- assert(XML.string_from_doc(title) == "Less: \n <")
- end
- end
- describe "Parses RSS Document" do
- test "git.sr.ht tags" do
- feed = File.read!("test/fixtures/git.sr.ht/pkgconf-2022-12-27.rss")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "~kaniini/pkgconf refs")
- assert(parsed.description == "Git refs for ~kaniini/pkgconf")
- assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs")
- assert(parsed.last_update == nil)
- assert(length(parsed.entries) == 20)
- # Sat, 30 May 2020 19:17:12 -0600
- # #DateTime<2020-05-30 19:17:12-06:00 -06 Etc/UTC-6>
- published = %DateTime{
- year: 2020,
- month: 05,
- day: 30,
- hour: 19,
- minute: 17,
- second: 12,
- std_offset: 0,
- utc_offset: -6 * 60 * 60,
- time_zone: "Etc/UTC-6",
- zone_abbr: "-06"
- }
- assert(
- Enum.at(parsed.entries, 0) == %{
- id: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
- title: "pkgconf-1.7.3",
- description: "pkgconf 1.7.3.",
- link: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
- published: published
- }
- )
- end
- end
- end