logo

news_parse_ex

news_parse_ex.ex (1286B)


  1. # NewsParseEx: RSS/Atom parser
  2. # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule NewsParseEx do
  5. alias NewsParseEx.XML
  6. defp get_feed_parser(doc) do
  7. with {:ok, root_name} <- XML.string_from_xpath(~s[name()], doc) do
  8. get_feed_parser(doc, root_name)
  9. end
  10. end
  11. defp get_feed_parser(doc, "feed") do
  12. with {:ok, namespace} <- XML.string_from_xpath(~s{/feed/namespace::*[name()='']}, doc) do
  13. if namespace == "http://www.w3.org/2005/Atom" do
  14. {:ok, NewsParseEx.Atom}
  15. else
  16. {:error, "Atom feed with wrong root namespace: #{namespace}"}
  17. end
  18. end
  19. end
  20. defp get_feed_parser(doc, "rss") do
  21. with {:ok, version} <- XML.string_from_xpath(~s{/rss/@version}, doc) do
  22. case version do
  23. "2.0" -> {:ok, NewsParseEx.RSS2_0}
  24. version -> {:error, "RSS with unknown version: #{version}"}
  25. end
  26. end
  27. end
  28. defp get_feed_parser(_doc, root_name) do
  29. {:error, "XML root isn't <feed> but <#{root_name}>"}
  30. end
  31. def parse(str) when is_bitstring(str) do
  32. with {_, {:ok, doc}} <- {:parse, XML.parse_document(str)},
  33. {_, {:ok, parser}} <- {:parser, get_feed_parser(doc)} do
  34. parser.parse(doc)
  35. end
  36. end
  37. end