logo

news_parse_ex

atom.ex (3685B)


  1. # NewsParseEx: RSS/Atom parser
  2. # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule NewsParseEx.Atom do
  5. alias NewsParseEx.Maps
  6. alias NewsParseEx.XML
  7. defp get_feed_title(doc) do
  8. with {:ok, type} <- XML.string_from_xpath(~s[/feed/title/@type], doc) do
  9. get_feed_title(doc, type)
  10. end
  11. end
  12. defp get_feed_title(doc, "html") do
  13. title = :xmerl_xpath.string('/feed/title/child::node()', doc)
  14. {:ok, {:html, title}}
  15. end
  16. defp get_feed_title(doc, "xhtml") do
  17. title = :xmerl_xpath.string('/feed/title/*[local-name(.)="div"]', doc)
  18. {:ok, {:xhtml, title}}
  19. end
  20. defp get_feed_title(doc, _) do
  21. XML.string_from_xpath(~s[/feed/title/text()], doc)
  22. end
  23. defp get_feed_id(doc), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
  24. defp get_feed_last_update(doc) do
  25. with {:ok, updated} <- XML.string_from_xpath(~s[/feed/updated/text()], doc) do
  26. Timex.parse(updated, "{ISO:Extended}")
  27. end
  28. end
  29. defp get_feed_description(_doc), do: {:ok, nil}
  30. defp get_feed_entries(doc) do
  31. items = :xmerl_xpath.string('/feed/entry', doc)
  32. if length(items) != 0 do
  33. entries = Enum.map(items, &get_feed_entry(&1))
  34. {:ok, entries}
  35. else
  36. {:ok, []}
  37. end
  38. end
  39. defp get_entry_title(frag), do: XML.string_from_xpath(~s{/entry/title}, frag)
  40. defp get_entry_description(frag), do: XML.string_from_xpath(~s{/entry/summary}, frag)
  41. defp get_entry_link(frag), do: XML.string_from_xpath(~s{/entry/link/@href}, frag)
  42. defp get_entry_id(frag), do: XML.string_from_xpath(~s{/entry/id/text()}, frag)
  43. defp get_entry_published(frag) do
  44. {:ok, pub} = XML.string_from_xpath(~s{/entry/published/text()}, frag)
  45. Timex.parse(pub, "{ISO:Extended}")
  46. end
  47. defp get_entry_updated(frag) do
  48. {:ok, pub} = XML.string_from_xpath(~s{/entry/updated/text()}, frag)
  49. Timex.parse(pub, "{ISO:Extended}")
  50. end
  51. defp get_entry_content(frag) do
  52. with {:ok, type} <- XML.string_from_xpath(~s[/entry/content/@type], frag) do
  53. get_entry_content(frag, type)
  54. end
  55. end
  56. defp get_entry_content(frag, "html") do
  57. content = :xmerl_xpath.string('/entry/content/child::node()', frag)
  58. {:ok, {:html, content}}
  59. end
  60. defp get_entry_content(frag, "xhtml") do
  61. content = :xmerl_xpath.string('/entry/content/*[local-name(.)="div"]', frag)
  62. {:ok, {:xhtml, content}}
  63. end
  64. defp get_entry_content(frag, _) do
  65. with {:ok, content} <- XML.string_from_xpath(~s[/entry/content/text()], frag) do
  66. if content == "" do
  67. :empty
  68. else
  69. {:ok, content}
  70. end
  71. end
  72. end
  73. defp get_feed_entry(frag) do
  74. %{}
  75. |> Maps.put_if_ok(:title, get_entry_title(frag))
  76. |> Maps.put_if_ok(:id, get_entry_id(frag))
  77. |> Maps.put_if_ok(:link, get_entry_link(frag))
  78. |> Maps.put_if_ok(:description, get_entry_description(frag))
  79. |> Maps.put_if_ok(:published, get_entry_published(frag))
  80. |> Maps.put_if_ok(:updated, get_entry_updated(frag))
  81. |> Maps.put_if_ok(:content, get_entry_content(frag))
  82. end
  83. def parse(doc) do
  84. with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
  85. {_, {:ok, last_update}} <- {:last_update, get_feed_last_update(doc)},
  86. {_, {:ok, title}} <- {:title, get_feed_title(doc)},
  87. {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
  88. {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
  89. data = %{
  90. :parser => NewsParseEx.Atom,
  91. :title => title,
  92. :description => description,
  93. :id => id,
  94. :last_update => last_update,
  95. :entries => entries
  96. }
  97. {:ok, data}
  98. end
  99. end
  100. end