atom.ex (3685B)
- # NewsParseEx: RSS/Atom parser
- # Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
- # SPDX-License-Identifier: AGPL-3.0-only
- defmodule NewsParseEx.Atom do
- alias NewsParseEx.Maps
- alias NewsParseEx.XML
- defp get_feed_title(doc) do
- with {:ok, type} <- XML.string_from_xpath(~s[/feed/title/@type], doc) do
- get_feed_title(doc, type)
- end
- end
- defp get_feed_title(doc, "html") do
- title = :xmerl_xpath.string('/feed/title/child::node()', doc)
- {:ok, {:html, title}}
- end
- defp get_feed_title(doc, "xhtml") do
- title = :xmerl_xpath.string('/feed/title/*[local-name(.)="div"]', doc)
- {:ok, {:xhtml, title}}
- end
- defp get_feed_title(doc, _) do
- XML.string_from_xpath(~s[/feed/title/text()], doc)
- end
- defp get_feed_id(doc), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
- defp get_feed_last_update(doc) do
- with {:ok, updated} <- XML.string_from_xpath(~s[/feed/updated/text()], doc) do
- Timex.parse(updated, "{ISO:Extended}")
- end
- end
- defp get_feed_description(_doc), do: {:ok, nil}
- defp get_feed_entries(doc) do
- items = :xmerl_xpath.string('/feed/entry', doc)
- if length(items) != 0 do
- entries = Enum.map(items, &get_feed_entry(&1))
- {:ok, entries}
- else
- {:ok, []}
- end
- end
- defp get_entry_title(frag), do: XML.string_from_xpath(~s{/entry/title}, frag)
- defp get_entry_description(frag), do: XML.string_from_xpath(~s{/entry/summary}, frag)
- defp get_entry_link(frag), do: XML.string_from_xpath(~s{/entry/link/@href}, frag)
- defp get_entry_id(frag), do: XML.string_from_xpath(~s{/entry/id/text()}, frag)
- defp get_entry_published(frag) do
- {:ok, pub} = XML.string_from_xpath(~s{/entry/published/text()}, frag)
- Timex.parse(pub, "{ISO:Extended}")
- end
- defp get_entry_updated(frag) do
- {:ok, pub} = XML.string_from_xpath(~s{/entry/updated/text()}, frag)
- Timex.parse(pub, "{ISO:Extended}")
- end
- defp get_entry_content(frag) do
- with {:ok, type} <- XML.string_from_xpath(~s[/entry/content/@type], frag) do
- get_entry_content(frag, type)
- end
- end
- defp get_entry_content(frag, "html") do
- content = :xmerl_xpath.string('/entry/content/child::node()', frag)
- {:ok, {:html, content}}
- end
- defp get_entry_content(frag, "xhtml") do
- content = :xmerl_xpath.string('/entry/content/*[local-name(.)="div"]', frag)
- {:ok, {:xhtml, content}}
- end
- defp get_entry_content(frag, _) do
- with {:ok, content} <- XML.string_from_xpath(~s[/entry/content/text()], frag) do
- if content == "" do
- :empty
- else
- {:ok, content}
- end
- end
- end
- defp get_feed_entry(frag) do
- %{}
- |> Maps.put_if_ok(:title, get_entry_title(frag))
- |> Maps.put_if_ok(:id, get_entry_id(frag))
- |> Maps.put_if_ok(:link, get_entry_link(frag))
- |> Maps.put_if_ok(:description, get_entry_description(frag))
- |> Maps.put_if_ok(:published, get_entry_published(frag))
- |> Maps.put_if_ok(:updated, get_entry_updated(frag))
- |> Maps.put_if_ok(:content, get_entry_content(frag))
- end
- def parse(doc) do
- with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
- {_, {:ok, last_update}} <- {:last_update, get_feed_last_update(doc)},
- {_, {:ok, title}} <- {:title, get_feed_title(doc)},
- {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
- {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
- data = %{
- :parser => NewsParseEx.Atom,
- :title => title,
- :description => description,
- :id => id,
- :last_update => last_update,
- :entries => entries
- }
- {:ok, data}
- end
- end
- end