commit: 1f3340d3604fa37fa5f2812c7bfa87b5ae856ba7
parent 96f57d300aefb7dec1edc982e423634dc0b67fbf
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Sat, 4 Feb 2023 12:16:53 +0100
Split Atom and RSS2.0 in two modules
Diffstat:
A | lib/atom.ex | 61 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | lib/news_parse_ex.ex | 110 | +++++++------------------------------------------------------------------------ |
A | lib/rss2_0.ex | 66 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 136 insertions(+), 101 deletions(-)
diff --git a/lib/atom.ex b/lib/atom.ex
@@ -0,0 +1,61 @@
+# NewsParseEx: RSS/Atom parser
+# Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule NewsParseEx.Atom do
+ alias NewsParseEx.XML
+
+ defp get_feed_title(doc) do
+ with {:ok, type} <- XML.string_from_xpath(~s[/feed/title/@type], doc) do
+ get_feed_title(doc, type)
+ end
+ end
+
+ defp get_feed_title(doc, "html") do
+ title = :xmerl_xpath.string('/feed/title/child::node()', doc)
+ {:ok, {:html, title}}
+ end
+
+ defp get_feed_title(doc, "xhtml") do
+ title = :xmerl_xpath.string('/feed/title/*[local-name(.)="div"]', doc)
+ {:ok, {:xhtml, title}}
+ end
+
+ defp get_feed_title(doc, _) do
+ XML.string_from_xpath(~s[/feed/title/text()], doc)
+ end
+
+ defp get_feed_id(doc), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
+
+ defp get_feed_last_update(doc) do
+ {:ok, updated} = XML.string_from_xpath(~s[/feed/updated/text()], doc)
+ DateTime.from_iso8601(updated)
+ end
+
+ defp get_feed_description(_doc), do: {:ok, nil}
+
+ defp get_feed_entries(_doc) do
+ # FIXME
+ {:ok, []}
+ end
+
+ def parse(doc) do
+ with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
+ {_, {:ok, last_update, _tz_offset}} <-
+ {:last_update, get_feed_last_update(doc)},
+ {_, {:ok, title}} <- {:title, get_feed_title(doc)},
+ {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
+ {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
+ data = %{
+ :parser => NewsParseEx.Atom,
+ :title => title,
+ :description => description,
+ :id => id,
+ :last_update => last_update,
+ :entries => entries
+ }
+
+ {:ok, data}
+ end
+ end
+end
diff --git a/lib/news_parse_ex.ex b/lib/news_parse_ex.ex
@@ -4,132 +4,40 @@
defmodule NewsParseEx do
alias NewsParseEx.XML
- alias NewsParseEx.Maps
- defp get_feed_type(doc) do
+ defp get_feed_parser(doc) do
with {:ok, root_name} <- XML.string_from_xpath(~s[name()], doc) do
- get_feed_type(doc, root_name)
+ get_feed_parser(doc, root_name)
end
end
- defp get_feed_type(doc, "feed") do
+ defp get_feed_parser(doc, "feed") do
with {:ok, namespace} <- XML.string_from_xpath(~s{/feed/namespace::*[name()='']}, doc) do
if namespace == "http://www.w3.org/2005/Atom" do
- {:ok, :atom}
+ {:ok, NewsParseEx.Atom}
else
{:error, "Atom feed with wrong root namespace: #{namespace}"}
end
end
end
- defp get_feed_type(doc, "rss") do
+ defp get_feed_parser(doc, "rss") do
with {:ok, version} <- XML.string_from_xpath(~s{/rss/@version}, doc) do
case version do
- "2.0" -> {:ok, :rss2_0}
+ "2.0" -> {:ok, NewsParseEx.RSS2_0}
version -> {:error, "RSS with unknown version: #{version}"}
end
end
end
- defp get_feed_type(_doc, root_name) do
+ defp get_feed_parser(_doc, root_name) do
{:error, "XML root isn't <feed> but <#{root_name}>"}
end
- defp get_feed_title(doc, :rss2_0), do: XML.string_from_xpath(~s[/rss/channel/title/text()], doc)
-
- defp get_feed_title(doc, :atom) do
- with {:ok, type} <- XML.string_from_xpath(~s[/feed/title/@type], doc) do
- get_feed_title(doc, :atom, type)
- end
- end
-
- defp get_feed_title(doc, :atom, "html") do
- title = :xmerl_xpath.string('/feed/title/child::node()', doc)
- {:ok, {:html, title}}
- end
-
- defp get_feed_title(doc, :atom, "xhtml") do
- title = :xmerl_xpath.string('/feed/title/*[local-name(.)="div"]', doc)
- {:ok, {:xhtml, title}}
- end
-
- defp get_feed_title(doc, :atom, _) do
- XML.string_from_xpath(~s[/feed/title/text()], doc)
- end
-
- defp get_feed_id(doc, :atom), do: XML.string_from_xpath(~s[/feed/id/text()], doc)
- defp get_feed_id(doc, :rss2_0), do: XML.string_from_xpath(~s[/rss/channel/link/text()], doc)
-
- defp get_feed_last_update(doc, :atom) do
- {:ok, updated} = XML.string_from_xpath(~s[/feed/updated/text()], doc)
- DateTime.from_iso8601(updated)
- end
-
- defp get_feed_last_update(_doc, :rss2_0), do: {:ok, nil, nil}
-
- defp get_feed_description(_doc, :atom), do: {:ok, nil}
-
- defp get_feed_description(doc, :rss2_0),
- do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc)
-
- defp get_feed_entries(_doc, :atom) do
- # FIXME
- {:ok, []}
- end
-
- defp get_feed_entries(doc, :rss2_0) do
- items = :xmerl_xpath.string('/rss/channel/item', doc)
-
- if length(items) != 0 do
- entries = Enum.map(items, &get_feed_entry(&1, :rss2_0))
- {:ok, entries}
- else
- {:ok, []}
- end
- end
-
- defp get_entry_title(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/title}, frag)
-
- defp get_entry_description(frag, :rss2_0),
- do: XML.string_from_xpath(~s{/item/description}, frag)
-
- defp get_entry_link(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/link}, frag)
- defp get_entry_id(frag, :rss2_0), do: XML.string_from_xpath(~s{/item/guid}, frag)
-
- defp get_entry_published(frag, :rss2_0) do
- with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do
- Calendar.DateTime.Parse.rfc822_utc(pubDate)
- end
- end
-
- defp get_feed_entry(frag, :rss2_0) do
- %{}
- |> Maps.put_if_ok(:title, get_entry_title(frag, :rss2_0))
- |> Maps.put_if_ok(:id, get_entry_id(frag, :rss2_0))
- |> Maps.put_if_ok(:link, get_entry_link(frag, :rss2_0))
- |> Maps.put_if_ok(:published, get_entry_published(frag, :rss2_0))
- |> Maps.put_if_ok(:description, get_entry_description(frag, :rss2_0))
- end
-
def parse(str) when is_bitstring(str) do
with {_, {:ok, doc}} <- {:parse, XML.parse_document(str)},
- {_, {:ok, feed_type}} <- {:type, get_feed_type(doc)},
- {_, {:ok, id}} <- {:id, get_feed_id(doc, feed_type)},
- {_, {:ok, last_update, _tz_offset}} <-
- {:last_update, get_feed_last_update(doc, feed_type)},
- {_, {:ok, title}} <- {:title, get_feed_title(doc, feed_type)},
- {_, {:ok, description}} <- {:desc, get_feed_description(doc, feed_type)},
- {_, {:ok, entries}} <- {:entries, get_feed_entries(doc, feed_type)} do
- data = %{
- :type => feed_type,
- :title => title,
- :description => description,
- :id => id,
- :last_update => last_update,
- :entries => entries
- }
-
- {:ok, data}
+ {_, {:ok, parser}} <- {:parser, get_feed_parser(doc)} do
+ parser.parse(doc)
end
end
end
diff --git a/lib/rss2_0.ex b/lib/rss2_0.ex
@@ -0,0 +1,66 @@
+# NewsParseEx: RSS/Atom parser
+# Copyright © 2022-2023 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule NewsParseEx.RSS2_0 do
+ alias NewsParseEx.Maps
+ alias NewsParseEx.XML
+
+ defp get_feed_title(doc), do: XML.string_from_xpath(~s[/rss/channel/title/text()], doc)
+ defp get_feed_id(doc), do: XML.string_from_xpath(~s[/rss/channel/link/text()], doc)
+ defp get_feed_last_update(_doc), do: {:ok, nil, nil}
+
+ defp get_feed_description(doc),
+ do: XML.string_from_xpath(~s[/rss/channel/description/text()], doc)
+
+ defp get_feed_entries(doc) do
+ items = :xmerl_xpath.string('/rss/channel/item', doc)
+
+ if length(items) != 0 do
+ entries = Enum.map(items, &get_feed_entry(&1))
+ {:ok, entries}
+ else
+ {:ok, []}
+ end
+ end
+
+ defp get_entry_title(frag), do: XML.string_from_xpath(~s{/item/title}, frag)
+ defp get_entry_description(frag), do: XML.string_from_xpath(~s{/item/description}, frag)
+ defp get_entry_link(frag), do: XML.string_from_xpath(~s{/item/link}, frag)
+ defp get_entry_id(frag), do: XML.string_from_xpath(~s{/item/guid}, frag)
+
+ defp get_entry_published(frag) do
+ with {:ok, pubDate} <- XML.string_from_xpath(~s{/item/pubDate}, frag) do
+ Calendar.DateTime.Parse.rfc822_utc(pubDate)
+ end
+ end
+
+ defp get_feed_entry(frag) do
+ %{}
+ |> Maps.put_if_ok(:title, get_entry_title(frag))
+ |> Maps.put_if_ok(:id, get_entry_id(frag))
+ |> Maps.put_if_ok(:link, get_entry_link(frag))
+ |> Maps.put_if_ok(:published, get_entry_published(frag))
+ |> Maps.put_if_ok(:description, get_entry_description(frag))
+ end
+
+ def parse(doc) do
+ with {_, {:ok, id}} <- {:id, get_feed_id(doc)},
+ {_, {:ok, last_update, _tz_offset}} <-
+ {:last_update, get_feed_last_update(doc)},
+ {_, {:ok, title}} <- {:title, get_feed_title(doc)},
+ {_, {:ok, description}} <- {:desc, get_feed_description(doc)},
+ {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do
+ data = %{
+ :parser => NewsParseEx.RSS2_0,
+ :title => title,
+ :description => description,
+ :id => id,
+ :last_update => last_update,
+ :entries => entries
+ }
+
+ {:ok, data}
+ end
+ end
+end