rss2_0.ex (2600B)
- # NewsParseEx: RSS/Atom parser
- # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
- # SPDX-License-Identifier: AGPL-3.0-only
- defmodule NewsParseEx.RSS2_0 do
- alias NewsParseEx.Maps
- alias NewsParseEx.XML
- defp get_feed_title(doc), do: XML.string_from_xpath(~s[/rss/channel/title/text()], doc)
- defp get_feed_id(doc), do: XML.string_from_xpath(~s[/rss/channel/link/text()], doc)
- defp get_feed_last_update(_doc), do: {:ok, nil, nil}
- defp get_feed_description(doc),
- do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc)
- defp get_feed_entries(doc) do
- items = :xmerl_xpath.string('/rss/channel/item', doc)
- if length(items) != 0 do
- entries = Enum.map(items, &get_feed_entry(&1))
- {:ok, entries}
- else
- {:ok, []}
- end
- end
- defp get_entry_title(frag), do: XML.string_from_xpath(~s{/item/title}, frag)
- defp get_entry_description(frag), do: XML.string_from_xpath(~s{/item/description}, frag)
- defp get_entry_link(frag), do: XML.string_from_xpath(~s{/item/link}, frag)
- defp get_entry_id(frag), do: XML.string_from_xpath(~s{/item/guid}, frag)
- defp parse_datetime(_str, []) do
- :error
- end
- defp parse_datetime(str, [format | rest]) when is_bitstring(str) do
- with {:ok, _} = dt <- Timex.parse(str, format) do
- dt
- else
- _e -> parse_datetime(str, rest)
- end
- end
- defp get_entry_published(frag) do
- with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do
- # RFC1123 is because of SourceHut
- parse_datetime(pubDate, ["{RFC822}", "{RFC1123}"])
- end
- end
- defp get_feed_entry(frag) do
- # FIXME: Don't just shove errors away
- %{}
- |> Maps.put_if_ok(:title, get_entry_title(frag))
- |> Maps.put_if_ok(:id, get_entry_id(frag))
- |> Maps.put_if_ok(:link, get_entry_link(frag))
- |> Maps.put_if_ok(:published, get_entry_published(frag))
- |> Maps.put_if_ok(:description, get_entry_description(frag))
- end
- def parse(doc) do
- with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
- {_, {:ok, last_update, _tz_offset}} <-
- {:last_update, get_feed_last_update(doc)},
- {_, {:ok, title}} <- {:title, get_feed_title(doc)},
- {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
- {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
- data = %{
- :parser => NewsParseEx.RSS2_0,
- :title => title,
- :description => description,
- :id => id,
- :last_update => last_update,
- :entries => entries
- }
- {:ok, data}
- end
- end
- end