commit: 52f843eb5519e59c8a637bb82a7568d29c526f75
parent e84b5e9b5596175e9f2c0a09e1cc1ab8f323a87e
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Tue, 27 Dec 2022 14:49:50 +0100
Add initial support for Atom type attribute
Diffstat:
3 files changed, 141 insertions(+), 38 deletions(-)
diff --git a/lib/news_parse_ex.ex b/lib/news_parse_ex.ex
@@ -34,9 +34,28 @@ defmodule NewsParseEx do
{:error, "XML root isn't <feed> but <#{root_name}>"}
end
- defp get_feed_title(doc, :atom), do: XML.string_from_xpath(~s[/feed/title/text()], doc)
defp get_feed_title(doc, :rss2_0), do: XML.string_from_xpath(~s[/rss/channel/title/text()], doc)
+ defp get_feed_title(doc, :atom) do
+ with {:ok, type} <- XML.string_from_xpath(~s[/feed/title/@type], doc) do
+ get_feed_title(doc, :atom, type)
+ end
+ end
+
+ defp get_feed_title(doc, :atom, "html") do
+ title = :xmerl_xpath.string('/feed/title/child::node()', doc)
+ {:ok, {:html, title}}
+ end
+
+ defp get_feed_title(doc, :atom, "xhtml") do
+ title = :xmerl_xpath.string('/feed/title/*[local-name(.)="div"]', doc)
+ {:ok, {:xhtml, title}}
+ end
+
+ defp get_feed_title(doc, :atom, _) do
+ XML.string_from_xpath(~s[/feed/title/text()], doc)
+ end
+
defp get_feed_id(doc, :atom), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
defp get_feed_id(doc, :rss2_0), do: XML.string_from_xpath(~s[/rss/channel/link/text()], doc)
diff --git a/lib/xml.ex b/lib/xml.ex
@@ -19,6 +19,16 @@ defmodule NewsParseEx.XML do
{:ok, res}
end
+ def string_from_doc(doc) do
+ :xmerl_xpath.string('//text()', doc)
+ |> Enum.map(fn x ->
+ {:xmlObj, :string, y} = :xmerl_xpath.string('string(.)', x)
+ y
+ end)
+ |> Enum.join()
+ |> String.trim()
+ end
+
def parse_document(text) do
try do
{doc, _rest} =
diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs
@@ -1,59 +1,133 @@
defmodule NewsParseExTest do
use ExUnit.Case
+
+ alias NewsParseEx.XML
+
doctest NewsParseEx
- test "parses basic Atom feed" do
- feed = ~s[<?xml version="1.0" encoding="utf-8"?>
+ describe "Parses Atom Feed Document" do
+ test "basic Atom feed" do
+ feed = ~s[<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Test Title</title>
<id>https://example.org/feed/</id>
<updated>2021-11-01T16:09:55Z</updated>
</feed>]
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "Test Title")
- assert(parsed.description == nil)
- assert(parsed.id == "https://example.org/feed/")
- assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
- end
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "Test Title")
+ assert(parsed.description == nil)
+ assert(parsed.id == "https://example.org/feed/")
+ assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
+ end
- test "parses Gitlab atom feed" do
- feed = File.read!("test/fixtures/gitlab/wlroots-2022-12-27.atom")
+ test "Gitlab atom feed" do
+ feed = File.read!("test/fixtures/gitlab/wlroots-2022-12-27.atom")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "wlroots tags")
- assert(parsed.description == nil)
- assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags")
- assert(parsed.last_update == ~U[2021-11-01T16:09:55Z])
- end
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "wlroots tags")
+ assert(parsed.description == nil)
+ assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags")
+ assert(parsed.last_update == ~U[2021-11-01T16:09:55Z])
+ end
+
+ test "RFC4287 brief, single-entry Atom Feed Document" do
+ feed = File.read!("test/fixtures/rfc4287/brief_single_entry.atom")
- test "parses RFC4287 brief, single-entry Atom Feed Document" do
- feed = File.read!("test/fixtures/rfc4287/brief_single_entry.atom")
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "Example Feed")
+ assert(parsed.description == nil)
+ assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6")
+ assert(parsed.last_update == ~U[2003-12-13T18:30:02Z])
+ end
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "Example Feed")
- assert(parsed.description == nil)
- assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6")
- assert(parsed.last_update == ~U[2003-12-13T18:30:02Z])
+ test "RFC4287 more extensive, single-entry Atom Feed Document" do
+ feed = File.read!("test/fixtures/rfc4287/extensive_single_entry.atom")
+
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "dive into mark")
+ assert(parsed.description == nil)
+ assert(parsed.id == "tag:example.org,2003:3")
+ assert(parsed.last_update == ~U[2005-07-31T12:29:29Z])
+ end
end
- test "parses RFC4287 more extensive, single-entry Atom Feed Document" do
- feed = File.read!("test/fixtures/rfc4287/extensive_single_entry.atom")
+ describe "Parses Atom title" do
+ setup do
+ %{feed: ~s[<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+ {{title}}
+ <id>https://example.org/feed/</id>
+ <updated>2021-11-01T16:09:55Z</updated>
+</feed>]}
+ end
+
+ test "plain text", %{feed: feed} do
+ {:ok, parsed} =
+ feed
+ |> String.replace("{{title}}", ~s{<title type="text">Less: <</title>})
+ |> NewsParseEx.parse()
+
+ assert(parsed.title == "Less: <")
+ end
+
+ test "HTML", %{feed: feed} do
+ {:ok, parsed} =
+ feed
+ |> String.replace(
+ "{{title}}",
+ ~s{<title type="html">Less: <em> &lt; </em></title>}
+ )
+ |> NewsParseEx.parse()
+
+ assert(
+ parsed.title ==
+ {:html, [{:xmlText, [title: 2, feed: 1], 1, [], 'Less: <em> < </em>', :text}]}
+ )
+ end
+
+ test "XHTML xhtml:div", %{feed: feed} do
+ title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
+ <xhtml:div>
+ Less: <xhtml:em> < </xhtml:em>
+ </xhtml:div>
+</title>}
+
+ {:ok, parsed} =
+ feed
+ |> String.replace("{{title}}", title)
+ |> NewsParseEx.parse()
+
+ assert({:xhtml, [title]} = parsed.title)
+ assert(XML.string_from_doc(title) == "Less: \n <")
+ end
+
+ test "XHTML div", %{feed: feed} do
+ title = ~s{<title type="xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml">
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ Less: <em> < </em>
+ </div>
+</title>}
+
+ {:ok, parsed} =
+ feed
+ |> String.replace("{{title}}", title)
+ |> NewsParseEx.parse()
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "dive into mark")
- assert(parsed.description == nil)
- assert(parsed.id == "tag:example.org,2003:3")
- assert(parsed.last_update == ~U[2005-07-31T12:29:29Z])
+ assert({:xhtml, [title]} = parsed.title)
+ assert(XML.string_from_doc(title) == "Less: \n <")
+ end
end
- test "parses git.sr.ht RSS feed" do
- feed = File.read!("test/fixtures/git.sr.ht/pkgconf-2022-12-27.rss")
+ describe "Parses RSS Document" do
+ test "git.sr.ht tags" do
+ feed = File.read!("test/fixtures/git.sr.ht/pkgconf-2022-12-27.rss")
- {:ok, parsed} = NewsParseEx.parse(feed)
- assert(parsed.title == "~kaniini/pkgconf refs")
- assert(parsed.description == "Git refs for ~kaniini/pkgconf")
- assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs")
- assert(parsed.last_update == nil)
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "~kaniini/pkgconf refs")
+ assert(parsed.description == "Git refs for ~kaniini/pkgconf")
+ assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs")
+ assert(parsed.last_update == nil)
+ end
end
end