logo

news_parse_ex

commit: 96f57d300aefb7dec1edc982e423634dc0b67fbf
parent 52f843eb5519e59c8a637bb82a7568d29c526f75
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Sat,  4 Feb 2023 11:19:44 +0100

Parse RSS 2.0 entries

Diffstat:

Alib/maps.ex11+++++++++++
Mlib/news_parse_ex.ex49+++++++++++++++++++++++++++++++++++++++++++++----
Mmix.exs2+-
Mtest/news_parse_ex_test.exs14+++++++++++++-
4 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/lib/maps.ex b/lib/maps.ex @@ -0,0 +1,11 @@ +# NewsParseEx: RSS/Atom parser +# Copyright © 2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule NewsParseEx.Maps do + def put_if_ok(map, key, {:ok, value}) when is_map(map) and key != nil do + Map.put(map, key, value) + end + + def put_if_ok(map, _key, _n), do: map +end diff --git a/lib/news_parse_ex.ex b/lib/news_parse_ex.ex @@ -1,11 +1,12 @@ # NewsParseEx: RSS/Atom parser -# Copyright © 2022 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me> +# Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me> # SPDX-License-Identifier: AGPL-3.0-only defmodule NewsParseEx do alias NewsParseEx.XML + alias NewsParseEx.Maps - def get_feed_type(doc) do + defp get_feed_type(doc) do with {:ok, root_name} <- XML.string_from_xpath(~s[name()], doc) do get_feed_type(doc, root_name) end @@ -71,6 +72,45 @@ defmodule NewsParseEx do defp get_feed_description(doc, :rss2_0), do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc) + defp get_feed_entries(_doc, :atom) do + # FIXME + {:ok, []} + end + + defp get_feed_entries(doc, :rss2_0) do + items = :xmerl_xpath.string('/rss/channel/item', doc) + + if length(items) != 0 do + entries = Enum.map(items, &get_feed_entry(&1, :rss2_0)) + {:ok, entries} + else + {:ok, []} + end + end + + defp get_entry_title(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/title}, frag) + + defp get_entry_description(frag, :rss2_0), + do: XML.string_from_xpath(~s{/item/description}, frag) + + defp get_entry_link(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/link}, frag) + defp get_entry_id(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/guid}, frag) + + defp get_entry_published(frag, :rss2_0) do + with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do + Calendar.DateTime.Parse.rfc822_utc(pubDate) + end + end + + defp get_feed_entry(frag, :rss2_0) do + %{} + |> Maps.put_if_ok(:title, get_entry_title(frag, :rss2_0)) + |> Maps.put_if_ok(:id, get_entry_id(frag, :rss2_0)) + |> Maps.put_if_ok(:link, get_entry_link(frag, :rss2_0)) + |> Maps.put_if_ok(:published, get_entry_published(frag, :rss2_0)) + |> Maps.put_if_ok(:description, get_entry_description(frag, :rss2_0)) + end + def parse(str) when is_bitstring(str) do with {_, {:ok, doc}} <- {:parse, XML.parse_document(str)}, {_, {:ok, feed_type}} <- {:type, get_feed_type(doc)}, @@ -78,14 +118,15 @@ defmodule NewsParseEx do {_, {:ok, last_update, _tz_offset}} <- {:last_update, get_feed_last_update(doc, feed_type)}, {_, {:ok, title}} <- {:title, get_feed_title(doc, feed_type)}, - {_, {:ok, description}} <- {:desc, get_feed_description(doc, feed_type)} do + {_, {:ok, description}} <- {:desc, get_feed_description(doc, feed_type)}, + {_, {:ok, entries}} <- {:entries, get_feed_entries(doc, feed_type)} do data = %{ :type => feed_type, :title => title, :description => description, :id => id, :last_update => last_update, - :entries => [] + :entries => entries } {:ok, data} diff --git a/mix.exs b/mix.exs @@ -19,6 +19,6 @@ defmodule NewsParseEx.MixProject do end defp deps do - [] + [{:calendar, "~> 1.0.0"}] end end diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs @@ -1,5 +1,5 @@ defmodule NewsParseExTest do - use ExUnit.Case + use ExUnit.Case, async: true alias NewsParseEx.XML @@ -128,6 +128,18 @@ defmodule NewsParseExTest do assert(parsed.description == "Git refs for ~kaniini/pkgconf") assert(parsed.id == "https://git.sr.ht/~kaniini/pkgconf/refs") assert(parsed.last_update == nil) + + assert(length(parsed.entries) == 20) + + assert( + Enum.at(parsed.entries, 0) == %{ + id: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3", + title: "pkgconf-1.7.3", + description: "pkgconf 1.7.3.", + link: "https://git.sr.ht/~kaniini/pkgconf/refs/pkgconf-1.7.3", + published: ~U[2020-05-31 01:17:12Z] + } + ) end end end