commit: 650f044102ba8c62bc7c7eaf3ff033aad7463a29
parent 18972a2a8d9a2c95a56ab13e5ee1d3c0dcafb3db
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Sun, 5 Feb 2023 11:48:09 +0100
Atom: Parse entries
Diffstat:
2 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/lib/atom.ex b/lib/atom.ex
@@ -3,6 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule NewsParseEx.Atom do
+ alias NewsParseEx.Maps
alias NewsParseEx.XML
defp get_feed_title(doc) do
@@ -28,21 +29,79 @@ defmodule NewsParseEx.Atom do
defp get_feed_id(doc), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
defp get_feed_last_update(doc) do
- {:ok, updated} = XML.string_from_xpath(~s[/feed/updated/text()], doc)
- DateTime.from_iso8601(updated)
+ with {:ok, updated} <- XML.string_from_xpath(~s[/feed/updated/text()], doc) do
+ Timex.parse(updated, "{ISO:Extended}")
+ end
end
defp get_feed_description(_doc), do: {:ok, nil}
- defp get_feed_entries(_doc) do
- # FIXME
- {:ok, []}
+ defp get_feed_entries(doc) do
+ items = :xmerl_xpath.string('/feed/entry', doc)
+
+ if length(items) != 0 do
+ entries = Enum.map(items, &get_feed_entry(&1))
+ {:ok, entries}
+ else
+ {:ok, []}
+ end
+ end
+
+ defp get_entry_title(frag), do: XML.string_from_xpath(~s{/entry/title}, frag)
+ defp get_entry_description(frag), do: XML.string_from_xpath(~s{/entry/summary}, frag)
+ defp get_entry_link(frag), do: XML.string_from_xpath(~s{/entry/link/@href}, frag)
+ defp get_entry_id(frag), do: XML.string_from_xpath(~s{/entry/id/text()}, frag)
+
+ defp get_entry_published(frag) do
+ {:ok, pub} = XML.string_from_xpath(~s{/entry/published/text()}, frag)
+ Timex.parse(pub, "{ISO:Extended}")
+ end
+
+ defp get_entry_updated(frag) do
+ {:ok, pub} = XML.string_from_xpath(~s{/entry/updated/text()}, frag)
+ Timex.parse(pub, "{ISO:Extended}")
+ end
+
+ defp get_entry_content(frag) do
+ with {:ok, type} <- XML.string_from_xpath(~s[/entry/content/@type], frag) do
+ get_entry_content(frag, type)
+ end
+ end
+
+ defp get_entry_content(frag, "html") do
+ content = :xmerl_xpath.string('/entry/content/child::node()', frag)
+ {:ok, {:html, content}}
+ end
+
+ defp get_entry_content(frag, "xhtml") do
+ content = :xmerl_xpath.string('/entry/content/*[local-name(.)="div"]', frag)
+ {:ok, {:xhtml, content}}
+ end
+
+ defp get_entry_content(frag, _) do
+ with {:ok, content} <- XML.string_from_xpath(~s[/entry/content/text()], frag) do
+ if content == "" do
+ :empty
+ else
+ {:ok, content}
+ end
+ end
+ end
+
+ defp get_feed_entry(frag) do
+ %{}
+ |> Maps.put_if_ok(:title, get_entry_title(frag))
+ |> Maps.put_if_ok(:id, get_entry_id(frag))
+ |> Maps.put_if_ok(:link, get_entry_link(frag))
+ |> Maps.put_if_ok(:description, get_entry_description(frag))
+ |> Maps.put_if_ok(:published, get_entry_published(frag))
+ |> Maps.put_if_ok(:updated, get_entry_updated(frag))
+ |> Maps.put_if_ok(:content, get_entry_content(frag))
end
def parse(doc) do
with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
- {_, {:ok, last_update, _tz_offset}} <-
- {:last_update, get_feed_last_update(doc)},
+ {_, {:ok, last_update}} <- {:last_update, get_feed_last_update(doc)},
{_, {:ok, title}} <- {:title, get_feed_title(doc)},
{_, {:ok, description}} <- {:desc, get_feed_description(doc)},
{_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs
@@ -23,6 +23,7 @@ defmodule NewsParseExTest do
assert(parsed.description == nil)
assert(parsed.id == "https://example.org/feed/")
assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
+ assert(parsed.entries == [])
end
test "Gitlab atom feed" do
@@ -33,6 +34,24 @@ defmodule NewsParseExTest do
assert(parsed.description == nil)
assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags")
assert(parsed.last_update == ~U[2021-11-01T16:09:55Z])
+
+ assert(length(parsed.entries) == 20)
+
+ entry_0 = Enum.at(parsed.entries, 0)
+
+ assert(
+ Map.get(entry_0, :id) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
+ )
+
+ assert(Map.get(entry_0, :description) |> String.split("\n") |> length == 37)
+
+ assert(
+ Map.get(entry_0, :link) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1"
+ )
+
+ assert(Map.get(entry_0, :updated) == ~U[2022-12-25T15:56:39Z])
+ assert(Map.get(entry_0, :published) == nil)
+ assert(Map.get(entry_0, :title) == "0.16.1")
end
test "RFC4287 brief, single-entry Atom Feed Document" do
@@ -43,6 +62,18 @@ defmodule NewsParseExTest do
assert(parsed.description == nil)
assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6")
assert(parsed.last_update == ~U[2003-12-13T18:30:02Z])
+
+ assert(
+ parsed.entries == [
+ %{
+ id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+ description: "Some text.",
+ link: "http://example.org/2003/12/13/atom03",
+ updated: ~U[2003-12-13T18:30:02Z],
+ title: "Atom-Powered Robots Run Amok"
+ }
+ ]
+ )
end
test "RFC4287 more extensive, single-entry Atom Feed Document" do
@@ -53,6 +84,34 @@ defmodule NewsParseExTest do
assert(parsed.description == nil)
assert(parsed.id == "tag:example.org,2003:3")
assert(parsed.last_update == ~U[2005-07-31T12:29:29Z])
+
+ # 2003-12-13 08:29:29-04:00
+ published = %DateTime{
+ year: 2003,
+ month: 12,
+ day: 13,
+ hour: 08,
+ minute: 29,
+ second: 29,
+ std_offset: 0,
+ utc_offset: -4 * 60 * 60,
+ time_zone: "Etc/UTC-4",
+ zone_abbr: "-04"
+ }
+
+ assert(
+ [
+ %{
+ title: "Atom draft-07 snapshot",
+ link: "http://example.org/2005/04/02/atom",
+ id: "tag:example.org,2003:3.2397",
+ updated: ~U[2005-07-31 12:29:29Z],
+ published: ^published,
+ content: {:xhtml, _content},
+ description: ""
+ }
+ ] = parsed.entries
+ )
end
end