commit: fd59c289e863d304403c694419d17bff4927373c
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Mon, 26 Dec 2022 15:29:38 +0100
init
Diffstat:
8 files changed, 151 insertions(+), 0 deletions(-)
diff --git a/.formatter.exs b/.formatter.exs
@@ -0,0 +1,4 @@
+# Used by "mix format"
+[
+ inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
+]
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+/_build/
+/cover/
+/deps/
+/doc/
+/.fetch
+erl_crash.dump
+*.ez
+news_parse_ex-*.tar
+/tmp/
diff --git a/README.md b/README.md
@@ -0,0 +1,3 @@
+# NewsParseEx
+
+Library to parse RSS/Atom news feeds
diff --git a/lib/news_parse_ex.ex b/lib/news_parse_ex.ex
@@ -0,0 +1,47 @@
+# NewsParseEx: RSS/Atom parser
+# Copyright © 2022 Haelwenn (lanodan) Monnier <contact+news_parse_ex@hacktivis.me>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule NewsParseEx do
+ alias NewsParseEx.XML
+
+ def get_feed_type(doc) do
+ root_name = XML.string_from_xpath(~s[name()], doc)
+
+ if root_name != "feed" do
+ {:error, "XML root isn't <feed> but #{root_name}"}
+ end
+
+ case XML.string_from_xpath(~s[/feed/namespace::*], doc) do
+ "http://www.w3.org/2005/Atom" -> {:ok, :atom}
+ e -> {:error, e}
+ end
+ end
+
+ def get_feed_title(doc, :atom), do: {:ok, XML.string_from_xpath(~s[/feed/title/text()], doc)}
+ def get_feed_id(doc, :atom), do: {:ok, XML.string_from_xpath(~s[/feed/id/text()], doc)}
+
+ def get_feed_last_update(doc, :atom) do
+ XML.string_from_xpath(~s[/feed/updated/text()], doc)
+ |> DateTime.from_iso8601()
+ end
+
+ def parse(str) when is_bitstring(str) do
+ with {_, {:ok, doc}} <- {:parse, XML.parse_document(str)},
+ {_, {:ok, feed_type}} <- {:type, get_feed_type(doc)},
+ {_, {:ok, title}} <- {:title, get_feed_title(doc, feed_type)},
+ {_, {:ok, id}} <- {:id, get_feed_id(doc, feed_type)},
+ {_, {:ok, last_update, _tz_offset}} <-
+ {:last_update, get_feed_last_update(doc, feed_type)} do
+ data = %{
+ :type => feed_type,
+ :title => title,
+ :id => id,
+ :last_update => last_update,
+ :entries => []
+ }
+
+ {:ok, data}
+ end
+ end
+end
diff --git a/lib/xml.ex b/lib/xml.ex
@@ -0,0 +1,45 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule NewsParseEx.XML do
+ require Logger
+
+ def string_from_xpath(_, :error), do: nil
+
+ def string_from_xpath(xpath, doc) do
+ try do
+ {:xmlObj, :string, res} = :xmerl_xpath.string('string(#{xpath})', doc)
+
+ res =
+ res
+ |> to_string
+ |> String.trim()
+
+ if res == "", do: nil, else: res
+ catch
+ _e ->
+ Logger.debug("Couldn't find xpath #{xpath} in XML doc")
+ nil
+ end
+ end
+
+ def parse_document(text) do
+ try do
+ {doc, _rest} =
+ text
+ |> :binary.bin_to_list()
+ |> :xmerl_scan.string(quiet: true)
+
+ {:ok, doc}
+ rescue
+ _e ->
+ Logger.debug("Couldn't parse XML: #{inspect(text)}")
+ :error
+ catch
+ :exit, _error ->
+ Logger.debug("Couldn't parse XML: #{inspect(text)}")
+ :error
+ end
+ end
+end
diff --git a/mix.exs b/mix.exs
@@ -0,0 +1,24 @@
+defmodule NewsParseEx.MixProject do
+ use Mix.Project
+
+ def project do
+ [
+ app: :news_parse_ex,
+ version: "0.1.0",
+ elixir: "~> 1.14",
+ start_permanent: Mix.env() == :prod,
+ deps: deps()
+ ]
+ end
+
+ # Run "mix help compile.app" to learn about applications.
+ def application do
+ [
+ extra_applications: [:logger, :xmerl]
+ ]
+ end
+
+ defp deps do
+ []
+ end
+end
diff --git a/test/news_parse_ex_test.exs b/test/news_parse_ex_test.exs
@@ -0,0 +1,18 @@
+defmodule NewsParseExTest do
+ use ExUnit.Case
+ doctest NewsParseEx
+
+ test "parses basic Atom feed" do
+ feed = ~s[<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+ <title>Test Title</title>
+ <id>https://example.org/feed/</id>
+ <updated>2021-11-01T16:09:55Z</updated>
+</feed>]
+
+ {:ok, parsed} = NewsParseEx.parse(feed)
+ assert(parsed.title == "Test Title")
+ assert(parsed.id == "https://example.org/feed/")
+ assert(parsed.last_update == ~U[2021-11-01 16:09:55Z])
+ end
+end
diff --git a/test/test_helper.exs b/test/test_helper.exs
@@ -0,0 +1 @@
+ExUnit.start()