logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://anongit.hacktivis.me/git/pleroma.git/
commit: a0f73d0e2f1afca990f6f1f2b39c986acdee8408
parent 1b438fd167368623ad9a0fbec92552e520f77bf5
Author: Phantasm <phantasm@centrum.cz>
Date:   Wed,  8 Oct 2025 23:34:43 +0200

Reimplement URI.encode_query/2 to support quirks, add Guardian quirk

This solves the issue with Guardian rich media cards not loading, thanks
to them using "," and ":" in queries which get improperly encoded.
Guardian also needs specific ordering of the query keys, this also fixes
that.

Diffstat:

Mlib/pleroma/http.ex49+++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/pleroma/http_test.exs67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex @@ -147,6 +147,7 @@ defmodule Pleroma.HTTP do URI.parse(url) |> then(fn parsed -> path = encode_path(parsed.path, bypass_decode) + |> maybe_apply_path_encoding_quirks() query = encode_query(parsed.query) %{parsed | path: path, query: query} @@ -186,9 +187,53 @@ defmodule Pleroma.HTTP do defp encode_query(nil), do: nil + # Order of kv pairs in query is not preserved when using URI.decode_query. + # URI.query_decoder/2 returns a stream which so far appears to not change order. + # Immediately switch to a list to prevent breakage for sites that expect + # the order of query keys to be always the same. defp encode_query(query) when is_binary(query) do query - |> URI.decode_query() - |> URI.encode_query() + |> URI.query_decoder() + |> Enum.to_list() + |> do_encode_query() + end + + defp maybe_apply_path_encoding_quirks(path), do: path + + # Always uses www_form encoding + defp do_encode_query(enumerable) do + Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) + end + + defp maybe_apply_query_quirk({key, value}) do + case key do + "precrop" -> + query_encode_kv_pair({key, value}, ~c":,") + + key -> + query_encode_kv_pair({key, value}) + end + end + + defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do + cond do + length(rules) > 0 -> + # URI.encode_query/2 does not appear to follow spec and encodes all parts of our URI path Constant. + # This appears to work outside of edge-cases like The Guardian Rich Media Cards, + # keeping behavior same as with URI.encode_query/2 unless otherwise specified via rules. + URI.encode_www_form(Kernel.to_string(key)) <> "=" <> + URI.encode(value, fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + rules, + fn char -> + char == byte + end) + end) + |> String.replace("%20", "+") + + true -> + URI.encode_www_form(Kernel.to_string(key)) <> "=" <> URI.encode_www_form(Kernel.to_string(value)) + end end end diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs @@ -37,6 +37,15 @@ defmodule Pleroma.HTTPTest do %{method: :get, url: "https://example.com/media/unicode%20%F0%9F%99%82%20.gif"} -> %Tesla.Env{status: 200, body: "unicode data"} + + %{method: :get, url: "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8"} -> + %Tesla.Env{status: 200, body: "Guardian image quirk"} + + %{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz"} -> + %Tesla.Env{status: 200, body: "Space in query with Guardian quirk"} + + %{method: :get, url: "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host"} -> + %Tesla.Env{status: 200, body: "AWS S3 data"} end) :ok @@ -127,4 +136,62 @@ defmodule Pleroma.HTTPTest do assert result == "https://example.com/media/file%2520with%2520space.jpg" end + + test "properly applies Guardian image query quirk" do + clear_config(:test_url_encoding, true) + + url = "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" + + result = HTTP.encode_url(url) + + assert result == url + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + end + + test "properly encodes spaces as \"pluses\" in query when using quirks" do + clear_config(:test_url_encoding, true) + + url = "https://example.com/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" + + properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + + result = HTTP.encode_url(url) + + assert result == properly_encoded_url + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + end + + test "properly encode AWS S3 queries" do + clear_config(:test_url_encoding, true) + + url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + unencoded_url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + + result = HTTP.encode_url(url) + result_unencoded = HTTP.encode_url(unencoded_url) + + assert result == url + assert result == result_unencoded + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + + end + + test "preserves query key order" do + clear_config(:test_url_encoding, true) + + url = "https://example.com/foo?hjkl=qwertz&xyz=abc&bar=baz" + + result = HTTP.encode_url(url) + + assert result == url + end end