logo

news_parse_ex

rss2_0.ex (2600B)


  1. # NewsParseEx: RSS/Atom parser
  2. # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule NewsParseEx.RSS2_0 do
  5. alias NewsParseEx.Maps
  6. alias NewsParseEx.XML
  7. defp get_feed_title(doc), do: XML.string_from_xpath(~s[/rss/channel/title/text()], doc)
  8. defp get_feed_id(doc), do: XML.string_from_xpath(~s[/rss/channel/link/text()], doc)
  9. defp get_feed_last_update(_doc), do: {:ok, nil, nil}
  10. defp get_feed_description(doc),
  11. do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc)
  12. defp get_feed_entries(doc) do
  13. items = :xmerl_xpath.string('/rss/channel/item', doc)
  14. if length(items) != 0 do
  15. entries = Enum.map(items, &get_feed_entry(&1))
  16. {:ok, entries}
  17. else
  18. {:ok, []}
  19. end
  20. end
  21. defp get_entry_title(frag), do: XML.string_from_xpath(~s{/item/title}, frag)
  22. defp get_entry_description(frag), do: XML.string_from_xpath(~s{/item/description}, frag)
  23. defp get_entry_link(frag), do: XML.string_from_xpath(~s{/item/link}, frag)
  24. defp get_entry_id(frag), do: XML.string_from_xpath(~s{/item/guid}, frag)
  25. defp parse_datetime(_str, []) do
  26. :error
  27. end
  28. defp parse_datetime(str, [format | rest]) when is_bitstring(str) do
  29. with {:ok, _} = dt <- Timex.parse(str, format) do
  30. dt
  31. else
  32. _e -> parse_datetime(str, rest)
  33. end
  34. end
  35. defp get_entry_published(frag) do
  36. with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do
  37. # RFC1123 is because of SourceHut
  38. parse_datetime(pubDate, ["{RFC822}", "{RFC1123}"])
  39. end
  40. end
  41. defp get_feed_entry(frag) do
  42. # FIXME: Don't just shove errors away
  43. %{}
  44. |> Maps.put_if_ok(:title, get_entry_title(frag))
  45. |> Maps.put_if_ok(:id, get_entry_id(frag))
  46. |> Maps.put_if_ok(:link, get_entry_link(frag))
  47. |> Maps.put_if_ok(:published, get_entry_published(frag))
  48. |> Maps.put_if_ok(:description, get_entry_description(frag))
  49. end
  50. def parse(doc) do
  51. with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
  52. {_, {:ok, last_update, _tz_offset}} <-
  53. {:last_update, get_feed_last_update(doc)},
  54. {_, {:ok, title}} <- {:title, get_feed_title(doc)},
  55. {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
  56. {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
  57. data = %{
  58. :parser => NewsParseEx.RSS2_0,
  59. :title => title,
  60. :description => description,
  61. :id => id,
  62. :last_update => last_update,
  63. :entries => entries
  64. }
  65. {:ok, data}
  66. end
  67. end
  68. end