logo

news_parse_ex

commit: 650f044102ba8c62bc7c7eaf3ff033aad7463a29
parent 18972a2a8d9a2c95a56ab13e5ee1d3c0dcafb3db
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Sun,  5 Feb 2023 11:48:09 +0100

Atom: Parse entries

Diffstat:

Mlib/atom.ex73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mtest/news_parse_ex_test.exs59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/lib/atom.ex b/lib/atom.ex @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule NewsParseEx.Atom do + alias NewsParseEx.Maps alias NewsParseEx.XML defp get_feed_title(doc) do @@ -28,21 +29,79 @@ defmodule NewsParseEx.Atom do defp get_feed_id(doc), do: XML.string_from_xpath(~s[/feed/id/text()], doc) defp get_feed_last_update(doc) do - {:ok, updated} = XML.string_from_xpath(~s[/feed/updated/text()], doc) - DateTime.from_iso8601(updated) + with {:ok, updated} <- XML.string_from_xpath(~s[/feed/updated/text()], doc) do + Timex.parse(updated, "{ISO:Extended}") + end end defp get_feed_description(_doc), do: {:ok, nil} - defp get_feed_entries(_doc) do - # FIXME - {:ok, []} + defp get_feed_entries(doc) do + items = :xmerl_xpath.string('/feed/entry', doc) + + if length(items) != 0 do + entries = Enum.map(items, &get_feed_entry(&1)) + {:ok, entries} + else + {:ok, []} + end + end + + defp get_entry_title(frag), do: XML.string_from_xpath(~s{/entry/title}, frag) + defp get_entry_description(frag), do: XML.string_from_xpath(~s{/entry/summary}, frag) + defp get_entry_link(frag), do: XML.string_from_xpath(~s{/entry/link/@href}, frag) + defp get_entry_id(frag), do: XML.string_from_xpath(~s{/entry/id/text()}, frag) + + defp get_entry_published(frag) do + {:ok, pub} = XML.string_from_xpath(~s{/entry/published/text()}, frag) + Timex.parse(pub, "{ISO:Extended}") + end + + defp get_entry_updated(frag) do + {:ok, pub} = XML.string_from_xpath(~s{/entry/updated/text()}, frag) + Timex.parse(pub, "{ISO:Extended}") + end + + defp get_entry_content(frag) do + with {:ok, type} <- XML.string_from_xpath(~s[/entry/content/@type], frag) do + get_entry_content(frag, type) + end + end + + defp get_entry_content(frag, "html") do + content = :xmerl_xpath.string('/entry/content/child::node()', frag) + {:ok, {:html, content}} + end + + defp get_entry_content(frag, "xhtml") do + content = :xmerl_xpath.string('/entry/content/*[local-name(.)="div"]', frag) + {:ok, {:xhtml, content}} + end + + defp get_entry_content(frag, _) do + with {:ok, content} <- XML.string_from_xpath(~s[/entry/content/text()], frag) do + if content == "" do + :empty + else + {:ok, content} + end + end + end + + defp get_feed_entry(frag) do + %{} + |> Maps.put_if_ok(:title, get_entry_title(frag)) + |> Maps.put_if_ok(:id, get_entry_id(frag)) + |> Maps.put_if_ok(:link, get_entry_link(frag)) + |> Maps.put_if_ok(:description, get_entry_description(frag)) + |> Maps.put_if_ok(:published, get_entry_published(frag)) + |> Maps.put_if_ok(:updated, get_entry_updated(frag)) + |> Maps.put_if_ok(:content, get_entry_content(frag)) end def parse(doc) do with {_, {:ok, id}} <- {:id, get_feed_id(doc)}, - {_, {:ok, last_update, _tz_offset}} <- - {:last_update, get_feed_last_update(doc)}, + {_, {:ok, last_update}} <- {:last_update, get_feed_last_update(doc)}, {_, {:ok, title}} <- {:title, get_feed_title(doc)}, {_, {:ok, description}} <- {:desc, get_feed_description(doc)}, {_, {:ok, entries}} <- {:entries, get_feed_entries(doc)} do diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs @@ -23,6 +23,7 @@ defmodule NewsParseExTest do assert(parsed.description == nil) assert(parsed.id == "https://example.org/feed/") assert(parsed.last_update == ~U[2021-11-01 16:09:55Z]) + assert(parsed.entries == []) end test "Gitlab atom feed" do @@ -33,6 +34,24 @@ defmodule NewsParseExTest do assert(parsed.description == nil) assert(parsed.id == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags") assert(parsed.last_update == ~U[2021-11-01T16:09:55Z]) + + assert(length(parsed.entries) == 20) + + entry_0 = Enum.at(parsed.entries, 0) + + assert( + Map.get(entry_0, :id) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1" + ) + + assert(Map.get(entry_0, :description) |> String.split("\n") |> length == 37) + + assert( + Map.get(entry_0, :link) == "https://gitlab.freedesktop.org/wlroots/wlroots/-/tags/0.16.1" + ) + + assert(Map.get(entry_0, :updated) == ~U[2022-12-25T15:56:39Z]) + assert(Map.get(entry_0, :published) == nil) + assert(Map.get(entry_0, :title) == "0.16.1") end test "RFC4287 brief, single-entry Atom Feed Document" do @@ -43,6 +62,18 @@ defmodule NewsParseExTest do assert(parsed.description == nil) assert(parsed.id == "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6") assert(parsed.last_update == ~U[2003-12-13T18:30:02Z]) + + assert( + parsed.entries == [ + %{ + id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + description: "Some text.", + link: "http://example.org/2003/12/13/atom03", + updated: ~U[2003-12-13T18:30:02Z], + title: "Atom-Powered Robots Run Amok" + } + ] + ) end test "RFC4287 more extensive, single-entry Atom Feed Document" do @@ -53,6 +84,34 @@ defmodule NewsParseExTest do assert(parsed.description == nil) assert(parsed.id == "tag:example.org,2003:3") assert(parsed.last_update == ~U[2005-07-31T12:29:29Z]) + + # 2003-12-13 08:29:29-04:00 + published = %DateTime{ + year: 2003, + month: 12, + day: 13, + hour: 08, + minute: 29, + second: 29, + std_offset: 0, + utc_offset: -4 * 60 * 60, + time_zone: "Etc/UTC-4", + zone_abbr: "-04" + } + + assert( + [ + %{ + title: "Atom draft-07 snapshot", + link: "http://example.org/2005/04/02/atom", + id: "tag:example.org,2003:3.2397", + updated: ~U[2005-07-31 12:29:29Z], + published: ^published, + content: {:xhtml, _content}, + description: "" + } + ] = parsed.entries + ) end end