logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 72480e7b2fc19beaa8a0a524e8b1c83442debd01
parent e9573627792df4cdaea15f1ca1563594f477cd8e
Author: feld <feld@feld.me>
Date:   Wed,  7 Feb 2024 05:47:38 +0000

Merge branch 'rich-media-tests' into 'develop'

Pleroma.Web.RichMedia.Parser: Remove test-specific codepaths

See merge request pleroma/pleroma!4053

Diffstat:

Achangelog.d/rich_media_tests.skip0
Mlib/pleroma/caching.ex3+++
Mlib/pleroma/web/rich_media/helpers.ex43-------------------------------------------
Mlib/pleroma/web/rich_media/parser.ex151++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mtest/fixtures/rich_media/oembed.html2+-
Mtest/pleroma/web/mastodon_api/controllers/status_controller_test.exs8+-------
Mtest/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs1+
Mtest/pleroma/web/rich_media/helpers_test.exs4++--
Mtest/pleroma/web/rich_media/parser_test.exs105++++++++++++++-----------------------------------------------------------------
Mtest/support/cachex_proxy.ex6++++++
Mtest/support/http_request_mock.ex61++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mtest/support/null_cache.ex6++++++
12 files changed, 191 insertions(+), 199 deletions(-)

diff --git a/changelog.d/rich_media_tests.skip b/changelog.d/rich_media_tests.skip diff --git a/lib/pleroma/caching.ex b/lib/pleroma/caching.ex @@ -8,10 +8,13 @@ defmodule Pleroma.Caching do @callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()} @callback fetch!(Cachex.cache(), any(), function() | nil) :: any() + @callback fetch(Cachex.cache(), any(), function() | nil) :: + {atom(), any()} | {atom(), any(), any()} # @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback stream!(Cachex.cache(), any()) :: Enumerable.t() @callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} + @callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} @callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback execute!(Cachex.cache(), function()) :: any() @callback get_and_update(Cachex.cache(), any(), function()) :: diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex @@ -18,53 +18,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do recv_timeout: 2_000 ] - @spec validate_page_url(URI.t() | binary()) :: :ok | :error - defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) - - page_url - |> Linkify.Parser.url?(validate_tld: validate_tld) - |> parse_uri(page_url) - end - - defp validate_page_url(%URI{host: host, scheme: "https"}) do - cond do - Linkify.Parser.ip?(host) -> - :error - - host in @config_impl.get([:rich_media, :ignore_hosts], []) -> - :error - - get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> - :error - - true -> - :ok - end - end - - defp validate_page_url(_), do: :error - - defp parse_uri(true, url) do - url - |> URI.parse() - |> validate_page_url - end - - defp parse_uri(_, _), do: :error - - defp get_tld(host) do - host - |> String.split(".") - |> Enum.reverse() - |> hd - end - def fetch_data_for_object(object) do with true <- @config_impl.get([:rich_media, :enabled]), {:ok, page_url} <- HTML.extract_first_external_url_from_object(object), - :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} else diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex @@ -6,6 +6,7 @@ defmodule Pleroma.Web.RichMedia.Parser do require Logger @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) defp parsers do Pleroma.Config.get([:rich_media, :parsers]) @@ -13,70 +14,66 @@ defmodule Pleroma.Web.RichMedia.Parser do def parse(nil), do: {:error, "No URL provided"} - if Pleroma.Config.get(:env) == :test do - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url), do: parse_url(url) - else - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url) do - with {:ok, data} <- get_cached_or_parse(url), - {:ok, _} <- set_ttl_based_on_image(data, url) do - {:ok, data} - end + @spec parse(String.t()) :: {:ok, map()} | {:error, any()} + def parse(url) do + with :ok <- validate_page_url(url), + {:ok, data} <- get_cached_or_parse(url), + {:ok, _} <- set_ttl_based_on_image(data, url) do + {:ok, data} end + end - defp get_cached_or_parse(url) do - case @cachex.fetch(:rich_media_cache, url, fn -> - case parse_url(url) do - {:ok, _} = res -> - {:commit, res} - - {:error, reason} = e -> - # Unfortunately we have to log errors here, instead of doing that - # along with ttl setting at the bottom. Otherwise we can get log spam - # if more than one process was waiting for the rich media card - # while it was generated. Ideally we would set ttl here as well, - # so we don't override it number_of_waiters_on_generation - # times, but one, obviously, can't set ttl for not-yet-created entry - # and Cachex doesn't support returning ttl from the fetch callback. - log_error(url, reason) - {:commit, e} - end - end) do - {action, res} when action in [:commit, :ok] -> - case res do - {:ok, _data} = res -> - res - - {:error, reason} = e -> - if action == :commit, do: set_error_ttl(url, reason) - e - end - - {:error, e} -> - {:error, {:cachex_error, e}} - end + defp get_cached_or_parse(url) do + case @cachex.fetch(:rich_media_cache, url, fn -> + case parse_url(url) do + {:ok, _} = res -> + {:commit, res} + + {:error, reason} = e -> + # Unfortunately we have to log errors here, instead of doing that + # along with ttl setting at the bottom. Otherwise we can get log spam + # if more than one process was waiting for the rich media card + # while it was generated. Ideally we would set ttl here as well, + # so we don't override it number_of_waiters_on_generation + # times, but one, obviously, can't set ttl for not-yet-created entry + # and Cachex doesn't support returning ttl from the fetch callback. + log_error(url, reason) + {:commit, e} + end + end) do + {action, res} when action in [:commit, :ok] -> + case res do + {:ok, _data} = res -> + res + + {:error, reason} = e -> + if action == :commit, do: set_error_ttl(url, reason) + e + end + + {:error, e} -> + {:error, {:cachex_error, e}} end + end - defp set_error_ttl(_url, :body_too_large), do: :ok - defp set_error_ttl(_url, {:content_type, _}), do: :ok + defp set_error_ttl(_url, :body_too_large), do: :ok + defp set_error_ttl(_url, {:content_type, _}), do: :ok - # The TTL is not set for the errors above, since they are unlikely to change - # with time + # The TTL is not set for the errors above, since they are unlikely to change + # with time - defp set_error_ttl(url, _reason) do - ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) - @cachex.expire(:rich_media_cache, url, ttl) - :ok - end + defp set_error_ttl(url, _reason) do + ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) + @cachex.expire(:rich_media_cache, url, ttl) + :ok + end - defp log_error(url, {:invalid_metadata, data}) do - Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) - end + defp log_error(url, {:invalid_metadata, data}) do + Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) + end - defp log_error(url, reason) do - Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) - end + defp log_error(url, reason) do + Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) end @doc """ @@ -166,4 +163,46 @@ defmodule Pleroma.Web.RichMedia.Parser do end) |> Map.new() end + + @spec validate_page_url(URI.t() | binary()) :: :ok | :error + defp validate_page_url(page_url) when is_binary(page_url) do + validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) + + page_url + |> Linkify.Parser.url?(validate_tld: validate_tld) + |> parse_uri(page_url) + end + + defp validate_page_url(%URI{host: host, scheme: "https"}) do + cond do + Linkify.Parser.ip?(host) -> + :error + + host in @config_impl.get([:rich_media, :ignore_hosts], []) -> + :error + + get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> + :error + + true -> + :ok + end + end + + defp validate_page_url(_), do: :error + + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end end diff --git a/test/fixtures/rich_media/oembed.html b/test/fixtures/rich_media/oembed.html @@ -1,3 +1,3 @@ <link rel="alternate" type="application/json+oembed" - href="http://example.com/oembed.json" + href="https://example.com/oembed.json" title="Bacon Lollys oEmbed Profile" /> diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -336,13 +336,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do path -> Pleroma.Test.StaticConfig.get(path) end) - Tesla.Mock.mock(fn - %{ - method: :get, - url: "https://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs @@ -49,6 +49,7 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do :chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"} cache, key -> NullCache.get(cache, key) end) + |> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end) chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref) diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.HelpersTest do - use Pleroma.DataCase, async: true + use Pleroma.DataCase, async: false alias Pleroma.StaticStubbedConfigMock, as: ConfigMock alias Pleroma.Web.CommonAPI @@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do import Tesla.Mock setup do - mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) ConfigMock |> stub(:get, fn diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs @@ -3,95 +3,26 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.ParserTest do - use ExUnit.Case, async: true + use Pleroma.DataCase, async: false alias Pleroma.Web.RichMedia.Parser - setup do - Tesla.Mock.mock(fn - %{ - method: :get, - url: "http://example.com/ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} - - %{ - method: :get, - url: "http://example.com/non-ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} - - %{ - method: :get, - url: "http://example.com/ogp-missing-title" - } -> - %Tesla.Env{ - status: 200, - body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") - } - - %{ - method: :get, - url: "http://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - - %{ - method: :get, - url: "http://example.com/oembed" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} - - %{ - method: :get, - url: "http://example.com/oembed.json" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} - - %{method: :get, url: "http://example.com/empty"} -> - %Tesla.Env{status: 200, body: "hello"} + import Tesla.Mock - %{method: :get, url: "http://example.com/malformed"} -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} - - %{method: :get, url: "http://example.com/error"} -> - {:error, :overload} - - %{ - method: :head, - url: "http://example.com/huge-page" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] - } - - %{ - method: :head, - url: "http://example.com/pdf-file" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] - } - - %{method: :head} -> - %Tesla.Env{status: 404, body: "", headers: []} - end) - - :ok + setup do + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) end test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + assert {:error, _} = Parser.parse("https://example.com/empty") end test "doesn't just add a title" do - assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp") end test "parses ogp" do - assert Parser.parse("http://example.com/ogp") == + assert Parser.parse("https://example.com/ogp") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -99,12 +30,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp" + "url" => "https://example.com/ogp" }} end test "falls back to <title> when ogp:title is missing" do - assert Parser.parse("http://example.com/ogp-missing-title") == + assert Parser.parse("https://example.com/ogp-missing-title") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -112,12 +43,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp-missing-title" + "url" => "https://example.com/ogp-missing-title" }} end test "parses twitter card" do - assert Parser.parse("http://example.com/twitter-card") == + assert Parser.parse("https://example.com/twitter-card") == {:ok, %{ "card" => "summary", @@ -125,12 +56,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission", "description" => "View the album on Flickr.", - "url" => "http://example.com/twitter-card" + "url" => "https://example.com/twitter-card" }} end test "parses OEmbed and filters HTML tags" do - assert Parser.parse("http://example.com/oembed") == + assert Parser.parse("https://example.com/oembed") == {:ok, %{ "author_name" => "\u202E\u202D\u202Cbees\u202C", @@ -150,7 +81,7 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "thumbnail_width" => 150, "title" => "Bacon Lollys", "type" => "photo", - "url" => "http://example.com/oembed", + "url" => "https://example.com/oembed", "version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc", @@ -159,18 +90,18 @@ defmodule Pleroma.Web.RichMedia.ParserTest do end test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + assert {:error, _} = Parser.parse("https://example.com/malformed") end test "returns error if getting page was not successful" do - assert {:error, :overload} = Parser.parse("http://example.com/error") + assert {:error, :overload} = Parser.parse("https://example.com/error") end test "does a HEAD request to check if the body is too large" do - assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page") end test "does a HEAD request to check if the body is html" do - assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file") end end diff --git a/test/support/cachex_proxy.ex b/test/support/cachex_proxy.ex @@ -27,9 +27,15 @@ defmodule Pleroma.CachexProxy do defdelegate fetch!(cache, key, func), to: Cachex @impl true + defdelegate fetch(cache, key, func), to: Cachex + + @impl true defdelegate expire_at(cache, str, num), to: Cachex @impl true + defdelegate expire(cache, str, num), to: Cachex + + @impl true defdelegate exists?(cache, key), to: Cachex @impl true diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex @@ -1059,7 +1059,7 @@ defmodule HttpRequestMock do }} end - def get("http://example.com/malformed", _, _, _) do + def get("https://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} end @@ -1472,6 +1472,37 @@ defmodule HttpRequestMock do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}} end + def get("https://example.com/error", _, _, _), do: {:error, :overload} + + def get("https://example.com/ogp-missing-title", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + }} + end + + def get("https://example.com/oembed", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}} + end + + def get("https://example.com/oembed.json", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}} + end + + def get("https://example.com/twitter-card", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}} + end + + def get("https://example.com/non-ogp", _, _, _) do + {:ok, + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}} + end + + def get("https://example.com/empty", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: "hello"}} + end + def get(url, query, body, headers) do {:error, "Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} @@ -1545,17 +1576,41 @@ defmodule HttpRequestMock do # Most of the rich media mocks are missing HEAD requests, so we just return 404. @rich_media_mocks [ + "https://example.com/empty", + "https://example.com/error", + "https://example.com/malformed", + "https://example.com/non-ogp", + "https://example.com/oembed", + "https://example.com/oembed.json", "https://example.com/ogp", "https://example.com/ogp-missing-data", + "https://example.com/ogp-missing-title", "https://example.com/twitter-card", "https://google.com/", - "https://yahoo.com/", - "https://pleroma.local/notice/9kCP7V" + "https://pleroma.local/notice/9kCP7V", + "https://yahoo.com/" ] + def head(url, _query, _body, _headers) when url in @rich_media_mocks do {:ok, %Tesla.Env{status: 404, body: ""}} end + def head("https://example.com/pdf-file", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + }} + end + + def head("https://example.com/huge-page", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + }} + end + def head(url, query, body, headers) do {:error, "Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} diff --git a/test/support/null_cache.ex b/test/support/null_cache.ex @@ -29,6 +29,9 @@ defmodule Pleroma.NullCache do end @impl true + def fetch(_, key, func), do: func.(key) + + @impl true def get_and_update(_, _, func) do func.(nil) end @@ -37,6 +40,9 @@ defmodule Pleroma.NullCache do def expire_at(_, _, _), do: {:ok, true} @impl true + def expire(_, _, _), do: {:ok, true} + + @impl true def exists?(_, _), do: {:ok, false} @impl true