logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://anongit.hacktivis.me/git/pleroma.git/
commit: f290b159875b68a4ee03ac9f9ced80242ee7085a
parent 6487c93c476aef40c78ad678e0e8352e747cd9a5
Author: Phantasm <phantasm@centrum.cz>
Date:   Mon, 20 Oct 2025 22:10:28 +0200

Move custom URI encoding functions to Pleroma.Utils.URIEncoding

Diffstat:

Mlib/pleroma/http.ex111-------------------------------------------------------------------------------
Mlib/pleroma/reverse_proxy.ex8+++++---
Mlib/pleroma/tesla/middleware/encode_url.ex2+-
Mlib/pleroma/upload.ex6+++---
Alib/pleroma/utils/uri_encoding.ex121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/pleroma/web/media_proxy.ex6+++---
Mtest/pleroma/http_test.exs17++++++++++-------
Mtest/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs15+++------------
8 files changed, 146 insertions(+), 140 deletions(-)

diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex @@ -14,7 +14,6 @@ defmodule Pleroma.HTTP do alias Tesla.Env require Logger - require Pleroma.Constants @type t :: __MODULE__ @type method() :: :get | :post | :put | :delete | :head @@ -132,114 +131,4 @@ defmodule Pleroma.HTTP do defp default_middleware, do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] - - # We don't always want to decode the path first, like is the case in - # Pleroma.Upload.url_from_spec/3. - def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do - bypass_parse = Keyword.get(opts, :bypass_parse, false) - bypass_decode = Keyword.get(opts, :bypass_decode, false) - - cond do - bypass_parse -> - encode_path(url, bypass_decode) - - true -> - URI.parse(url) - |> then(fn parsed -> - path = - encode_path(parsed.path, bypass_decode) - |> maybe_apply_path_encoding_quirks() - - query = encode_query(parsed.query) - - %{parsed | path: path, query: query} - end) - |> URI.to_string() - end - end - - defp encode_path(nil, _bypass_decode), do: nil - - # URI.encode/2 deliberately does not encode all chars that are forbidden - # in the path component of a URI. It only encodes chars that are forbidden - # in the whole URI. A predicate in the 2nd argument is used to fix that here. - # URI.encode/2 uses the predicate function to determine whether each byte - # (in an integer representation) should be encoded or not. - defp encode_path(path, bypass_decode) when is_binary(path) do - path = - cond do - bypass_decode -> - path - - true -> - URI.decode(path) - end - - path - |> URI.encode(fn byte -> - URI.char_unreserved?(byte) || - Enum.any?( - Pleroma.Constants.uri_path_allowed_reserved_chars(), - fn char -> - char == byte - end - ) - end) - end - - defp encode_query(nil), do: nil - - # Order of kv pairs in query is not preserved when using URI.decode_query. - # URI.query_decoder/2 returns a stream which so far appears to not change order. - # Immediately switch to a list to prevent breakage for sites that expect - # the order of query keys to be always the same. - defp encode_query(query) when is_binary(query) do - query - |> URI.query_decoder() - |> Enum.to_list() - |> do_encode_query() - end - - defp maybe_apply_path_encoding_quirks(path), do: path - - # Always uses www_form encoding - defp do_encode_query(enumerable) do - Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) - end - - defp maybe_apply_query_quirk({key, value}) do - case key do - "precrop" -> - query_encode_kv_pair({key, value}, ~c":,") - - key -> - query_encode_kv_pair({key, value}) - end - end - - defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do - cond do - length(rules) > 0 -> - # URI.encode_query/2 does not appear to follow spec and encodes all part - # of our URI path Constant. This appears to work outside of edge-cases - # like The Guardian Rich Media Cards, keeping behavior same as with - # URI.encode_query/2 unless otherwise specified via rules. - (URI.encode_www_form(Kernel.to_string(key)) <> - "=" <> - URI.encode(value, fn byte -> - URI.char_unreserved?(byte) || - Enum.any?( - rules, - fn char -> - char == byte - end - ) - end)) - |> String.replace("%20", "+") - - true -> - URI.encode_www_form(Kernel.to_string(key)) <> - "=" <> URI.encode_www_form(Kernel.to_string(value)) - end - end end diff --git a/lib/pleroma/reverse_proxy.ex b/lib/pleroma/reverse_proxy.ex @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.ReverseProxy do + alias Pleroma.Utils.URIEncoding + @range_headers ~w(range if-range) @keep_req_headers ~w(accept accept-encoding cache-control if-modified-since) ++ ~w(if-unmodified-since if-none-match) ++ @range_headers @@ -460,9 +462,9 @@ defmodule Pleroma.ReverseProxy do # Also do it for test environment defp maybe_encode_url(url) do case Application.get_env(:tesla, :adapter) do - Tesla.Adapter.Hackney -> Pleroma.HTTP.encode_url(url) - {Tesla.Adapter.Finch, _} -> Pleroma.HTTP.encode_url(url) - Tesla.Mock -> Pleroma.HTTP.encode_url(url) + Tesla.Adapter.Hackney -> URIEncoding.encode_url(url) + {Tesla.Adapter.Finch, _} -> URIEncoding.encode_url(url) + Tesla.Mock -> URIEncoding.encode_url(url) _ -> url end end diff --git a/lib/pleroma/tesla/middleware/encode_url.ex b/lib/pleroma/tesla/middleware/encode_url.ex @@ -17,7 +17,7 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do @impl Tesla.Middleware def call(%Tesla.Env{url: url} = env, next, _) do - url = Pleroma.HTTP.encode_url(url) + url = Pleroma.Utils.URIEncoding.encode_url(url) env = %{env | url: url} diff --git a/lib/pleroma/upload.ex b/lib/pleroma/upload.ex @@ -34,8 +34,8 @@ defmodule Pleroma.Upload do """ alias Ecto.UUID - alias Pleroma.HTTP alias Pleroma.Maps + alias Pleroma.Utils.URIEncoding alias Pleroma.Web.ActivityPub.Utils require Logger @@ -234,12 +234,12 @@ defmodule Pleroma.Upload do # Encoding the whole path here is fine since the path is in a # UUID/<file name> form. # The file at this point isn't %-encoded, so the path shouldn't - # be decoded first like Pleroma.HTTP.encode_url/1 does. + # be decoded first like Pleroma.Utils.URIEncoding.encode_url/1 does. defp url_from_spec(%__MODULE__{name: name}, base_url, {:file, path}) do encode_opts = [bypass_decode: true, bypass_parse: true] path = - HTTP.encode_url(path, encode_opts) <> + URIEncoding.encode_url(path, encode_opts) <> if Pleroma.Config.get([__MODULE__, :link_name], false) do enum = %{name: name} "?#{URI.encode_query(enum)}" diff --git a/lib/pleroma/utils/uri_encoding.ex b/lib/pleroma/utils/uri_encoding.ex @@ -0,0 +1,121 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2025 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Utils.URIEncoding do + @moduledoc """ + Utility functions for dealing with URI encoding of paths and queries + with support for query-encoding quirks. + """ + require Pleroma.Constants + + # We don't always want to decode the path first, like is the case in + # Pleroma.Upload.url_from_spec/3. + def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do + bypass_parse = Keyword.get(opts, :bypass_parse, false) + bypass_decode = Keyword.get(opts, :bypass_decode, false) + + cond do + bypass_parse -> + encode_path(url, bypass_decode) + + true -> + URI.parse(url) + |> then(fn parsed -> + path = + encode_path(parsed.path, bypass_decode) + |> maybe_apply_path_encoding_quirks() + + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end + end + + defp encode_path(nil, _bypass_decode), do: nil + + # URI.encode/2 deliberately does not encode all chars that are forbidden + # in the path component of a URI. It only encodes chars that are forbidden + # in the whole URI. A predicate in the 2nd argument is used to fix that here. + # URI.encode/2 uses the predicate function to determine whether each byte + # (in an integer representation) should be encoded or not. + defp encode_path(path, bypass_decode) when is_binary(path) do + path = + cond do + bypass_decode -> + path + + true -> + URI.decode(path) + end + + path + |> URI.encode(fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + Pleroma.Constants.uri_path_allowed_reserved_chars(), + fn char -> + char == byte + end + ) + end) + end + + defp encode_query(nil), do: nil + + # Order of kv pairs in query is not preserved when using URI.decode_query. + # URI.query_decoder/2 returns a stream which so far appears to not change order. + # Immediately switch to a list to prevent breakage for sites that expect + # the order of query keys to be always the same. + defp encode_query(query) when is_binary(query) do + query + |> URI.query_decoder() + |> Enum.to_list() + |> do_encode_query() + end + + defp maybe_apply_path_encoding_quirks(path), do: path + + # Always uses www_form encoding + defp do_encode_query(enumerable) do + Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) + end + + defp maybe_apply_query_quirk({key, value}) do + case key do + "precrop" -> + query_encode_kv_pair({key, value}, ~c":,") + + key -> + query_encode_kv_pair({key, value}) + end + end + + defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do + cond do + length(rules) > 0 -> + # URI.encode_query/2 does not appear to follow spec and encodes all parts + # of our URI path Constant. This appears to work outside of edge-cases + # like The Guardian Rich Media Cards, keeping behavior same as with + # URI.encode_query/2 unless otherwise specified via rules. + (URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> + URI.encode(value, fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + rules, + fn char -> + char == byte + end + ) + end)) + |> String.replace("%20", "+") + + true -> + URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> URI.encode_www_form(Kernel.to_string(value)) + end + end +end diff --git a/lib/pleroma/web/media_proxy.ex b/lib/pleroma/web/media_proxy.ex @@ -5,8 +5,8 @@ defmodule Pleroma.Web.MediaProxy do alias Pleroma.Config alias Pleroma.Helpers.UriHelper - alias Pleroma.HTTP alias Pleroma.Upload + alias Pleroma.Utils.URIEncoding alias Pleroma.Web.Endpoint alias Pleroma.Web.MediaProxy.Invalidation @@ -107,14 +107,14 @@ defmodule Pleroma.Web.MediaProxy do # End result is a failing HEAD request in # Pleroma.Web.MediaProxy.MediaProxyController.handle_preview/2 def encode_url(url) do - url = HTTP.encode_url(url) + url = URIEncoding.encode_url(url) {base64, sig64} = base64_sig64(url) build_url(sig64, base64, filename(url)) end def encode_preview_url(url, preview_params \\ []) do - url = HTTP.encode_url(url) + url = URIEncoding.encode_url(url) {base64, sig64} = base64_sig64(url) build_preview_url(sig64, base64, filename(url), preview_params) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs @@ -5,8 +5,11 @@ defmodule Pleroma.HTTPTest do use ExUnit.Case, async: true use Pleroma.Tests.Helpers + import Tesla.Mock + alias Pleroma.HTTP + alias Pleroma.Utils.URIEncoding setup do mock(fn @@ -134,7 +137,7 @@ defmodule Pleroma.HTTPTest do normal_url = "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" - result = HTTP.encode_url(normal_url) + result = URIEncoding.encode_url(normal_url) assert result == "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" end @@ -144,7 +147,7 @@ defmodule Pleroma.HTTPTest do normal_url = "https://example.com/media/file%20with%20space.jpg" - result = HTTP.encode_url(normal_url, bypass_decode: true) + result = URIEncoding.encode_url(normal_url, bypass_decode: true) assert result == "https://example.com/media/file%2520with%2520space.jpg" end @@ -155,7 +158,7 @@ defmodule Pleroma.HTTPTest do url = "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == url @@ -173,7 +176,7 @@ defmodule Pleroma.HTTPTest do properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == properly_encoded_url @@ -191,8 +194,8 @@ defmodule Pleroma.HTTPTest do unencoded_url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" - result = HTTP.encode_url(url) - result_unencoded = HTTP.encode_url(unencoded_url) + result = URIEncoding.encode_url(url) + result_unencoded = URIEncoding.encode_url(unencoded_url) assert result == url assert result == result_unencoded @@ -207,7 +210,7 @@ defmodule Pleroma.HTTPTest do url = "https://example.com/foo?hjkl=qwertz&xyz=abc&bar=baz" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == url end diff --git a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs @@ -58,10 +58,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MediaProxyWarmingPolicy.filter(@message) assert called(HTTP.get(:_, :_, :_)) @@ -88,10 +85,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) assert called(HTTP.get(:_, :_, :_)) @@ -103,10 +97,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) assert called(HTTP.get(:_, :_, :_))