commit: 96f57d300aefb7dec1edc982e423634dc0b67fbf
parent 52f843eb5519e59c8a637bb82a7568d29c526f75
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Sat, 4 Feb 2023 11:19:44 +0100
Parse RSS 2.0 entries
Diffstat:
4 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/lib/maps.ex b/lib/maps.ex
@@ -0,0 +1,11 @@
+# NewsParseEx: RSS/Atom parser
+# Copyright © 2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule NewsParseEx.Maps do
+ def put_if_ok(map, key, {:ok, value}) when is_map(map) and key != nil do
+ Map.put(map, key, value)
+ end
+
+ def put_if_ok(map, _key, _n), do: map
+end
diff --git a/lib/news_parse_ex.ex b/lib/news_parse_ex.ex
@@ -1,11 +1,12 @@
# NewsParseEx: RSS/Atom parser
-# Copyright © 2022 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
+# Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule NewsParseEx do
alias NewsParseEx.XML
+ alias NewsParseEx.Maps
- def get_feed_type(doc) do
+ defp get_feed_type(doc) do
with {:ok, root_name} <- XML.string_from_xpath(~s[name()], doc) do
get_feed_type(doc, root_name)
end
@@ -71,6 +72,45 @@ defmodule NewsParseEx do
defp get_feed_description(doc, :rss2_0),
do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc)
+ defp get_feed_entries(_doc, :atom) do
+ # FIXME
+ {:ok, []}
+ end
+
+ defp get_feed_entries(doc, :rss2_0) do
+ items = :xmerl_xpath.string('/rss/channel/item', doc)
+
+ if length(items) != 0 do
+ entries = Enum.map(items, &get_feed_entry(&1, :rss2_0))
+ {:ok, entries}
+ else
+ {:ok, []}
+ end
+ end
+
+ defp get_entry_title(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/title}, frag)
+
+ defp get_entry_description(frag, :rss2_0),
+ do: XML.string_from_xpath(~s{/item/description}, frag)
+
+ defp get_entry_link(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/link}, frag)
+ defp get_entry_id(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/guid}, frag)
+
+ defp get_entry_published(frag, :rss2_0) do
+ with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do
+ Calendar.DateTime.Parse.rfc822_utc(pubDate)
+ end
+ end
+
+ defp get_feed_entry(frag, :rss2_0) do
+ %{}
+ |> Maps.put_if_ok(:title, get_entry_title(frag, :rss2_0))
+ |> Maps.put_if_ok(:id, get_entry_id(frag, :rss2_0))
+ |> Maps.put_if_ok(:link, get_entry_link(frag, :rss2_0))
+ |> Maps.put_if_ok(:published, get_entry_published(frag, :rss2_0))
+ |> Maps.put_if_ok(:description, get_entry_description(frag, :rss2_0))
+ end
+
def parse(str) when is_bitstring(str) do
with {_, {:ok, doc}} <- {:parse, XML.parse_document(str)},
{_, {:ok, feed_type}} <- {:type, get_feed_type(doc)},
@@ -78,14 +118,15 @@ defmodule NewsParseEx do
{_, {:ok, last_update, _tz_offset}} <-
{:last_update, get_feed_last_update(doc, feed_type)},
{_, {:ok, title}} <- {:title, get_feed_title(doc, feed_type)},
- {_, {:ok, description}} <- {:desc, get_feed_description(doc, feed_type)} do
+ {_, {:ok, description}} <- {:desc, get_feed_description(doc, feed_type)},
+ {_, {:ok, entries}} <- {:entries, get_feed_entries(doc, feed_type)} do
data = %{
:type => feed_type,
:title => title,
:description => description,
:id => id,
:last_update => last_update,
- :entries => []
+ :entries => entries
}
{:ok, data}
diff --git a/mix.exs b/mix.exs
@@ -19,6 +19,6 @@ defmodule NewsParseEx.MixProject do
end
defp deps do
- []
+ [{:calendar, "~> 1.0.0"}]
end
end
diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs
@@ -1,5 +1,5 @@
defmodule NewsParseExTest do
- use ExUnit.Case
+ use ExUnit.Case, async: true
alias NewsParseEx.XML
@@ -128,6 +128,18 @@ defmodule NewsParseExTest do
assert(parsed.description == "Git refs for ~kaniini/pkgconf")
assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs")
assert(parsed.last_update == nil)
+
+ assert(length(parsed.entries) == 20)
+
+ assert(
+ Enum.at(parsed.entries, 0) == %{
+ id: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
+ title: "pkgconf-1.7.3",
+ description: "pkgconf 1.7.3.",
+ link: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3",
+ published: ~U[2020-05-31 01:17:12Z]
+ }
+ )
end
end
end