logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 9f2319e50dc0516bde4bfa3b117ec4792e553bd2
parent 6b7b443ff95587b33f4b666e68ed82dc6fb485a5
Author: Mark Felder <feld@feld.me>
Date:   Tue,  6 Feb 2024 16:54:52 -0500

RichMedia.Helpers: move the validate_page_url/1 function to the Parser module

This will ensure that the page validation happens in Parser.parse/1 so it can be called from anywhere and still filter invalid URLs.

Diffstat:

Achangelog.d/rich_media_tests.skip0
Mlib/pleroma/web/rich_media/helpers.ex43-------------------------------------------
Mlib/pleroma/web/rich_media/parser.ex46+++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/changelog.d/rich_media_tests.skip b/changelog.d/rich_media_tests.skip diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex @@ -18,53 +18,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do recv_timeout: 2_000 ] - @spec validate_page_url(URI.t() | binary()) :: :ok | :error - defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) - - page_url - |> Linkify.Parser.url?(validate_tld: validate_tld) - |> parse_uri(page_url) - end - - defp validate_page_url(%URI{host: host, scheme: "https"}) do - cond do - Linkify.Parser.ip?(host) -> - :error - - host in @config_impl.get([:rich_media, :ignore_hosts], []) -> - :error - - get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> - :error - - true -> - :ok - end - end - - defp validate_page_url(_), do: :error - - defp parse_uri(true, url) do - url - |> URI.parse() - |> validate_page_url - end - - defp parse_uri(_, _), do: :error - - defp get_tld(host) do - host - |> String.split(".") - |> Enum.reverse() - |> hd - end - def fetch_data_for_object(object) do with true <- @config_impl.get([:rich_media, :enabled]), {:ok, page_url} <- HTML.extract_first_external_url_from_object(object), - :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} else diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex @@ -6,6 +6,7 @@ defmodule Pleroma.Web.RichMedia.Parser do require Logger @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) defp parsers do Pleroma.Config.get([:rich_media, :parsers]) @@ -15,7 +16,8 @@ defmodule Pleroma.Web.RichMedia.Parser do @spec parse(String.t()) :: {:ok, map()} | {:error, any()} def parse(url) do - with {:ok, data} <- get_cached_or_parse(url), + with :ok <- validate_page_url(url), + {:ok, data} <- get_cached_or_parse(url), {:ok, _} <- set_ttl_based_on_image(data, url) do {:ok, data} end @@ -161,4 +163,46 @@ defmodule Pleroma.Web.RichMedia.Parser do end) |> Map.new() end + + @spec validate_page_url(URI.t() | binary()) :: :ok | :error + defp validate_page_url(page_url) when is_binary(page_url) do + validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) + + page_url + |> Linkify.Parser.url?(validate_tld: validate_tld) + |> parse_uri(page_url) + end + + defp validate_page_url(%URI{host: host, scheme: "https"}) do + cond do + Linkify.Parser.ip?(host) -> + :error + + host in @config_impl.get([:rich_media, :ignore_hosts], []) -> + :error + + get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> + :error + + true -> + :ok + end + end + + defp validate_page_url(_), do: :error + + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end end