logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 8d0703460851155296255561b23ab22696446803
parent 7910b235c7e7e7ae6904f01eee531e10d3005d72
Author: feld <feld@feld.me>
Date:   Thu, 29 Aug 2024 14:54:01 +0000

Merge branch 'pleroma-http-stream' into 'develop'

Pleroma.HTTP: support streaming response bodies

See merge request pleroma/pleroma!4239

Diffstat:

Achangelog.d/rich-media-no-heads.change1+
Mlib/pleroma/http/adapter_helper.ex10++++++++++
Alib/pleroma/http/adapter_helper/finch.ex33+++++++++++++++++++++++++++++++++
Mlib/pleroma/http/adapter_helper/gun.ex9+++++++++
Mlib/pleroma/web/rich_media/helpers.ex71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
5 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/changelog.d/rich-media-no-heads.change b/changelog.d/rich-media-no-heads.change @@ -0,0 +1 @@ +Rich Media preview fetching will skip making an HTTP HEAD request to check a URL for allowed content type and length if the Tesla adapter is Gun or Finch diff --git a/lib/pleroma/http/adapter_helper.ex b/lib/pleroma/http/adapter_helper.ex @@ -52,6 +52,7 @@ defmodule Pleroma.HTTP.AdapterHelper do case adapter() do Tesla.Adapter.Gun -> AdapterHelper.Gun Tesla.Adapter.Hackney -> AdapterHelper.Hackney + {Tesla.Adapter.Finch, _} -> AdapterHelper.Finch _ -> AdapterHelper.Default end end @@ -118,4 +119,13 @@ defmodule Pleroma.HTTP.AdapterHelper do host_charlist end end + + @spec can_stream? :: bool() + def can_stream? do + case Application.get_env(:tesla, :adapter) do + Tesla.Adapter.Gun -> true + {Tesla.Adapter.Finch, _} -> true + _ -> false + end + end end diff --git a/lib/pleroma/http/adapter_helper/finch.ex b/lib/pleroma/http/adapter_helper/finch.ex @@ -0,0 +1,33 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTTP.AdapterHelper.Finch do + @behaviour Pleroma.HTTP.AdapterHelper + + alias Pleroma.Config + alias Pleroma.HTTP.AdapterHelper + + @spec options(keyword(), URI.t()) :: keyword() + def options(incoming_opts \\ [], %URI{} = _uri) do + proxy = + [:http, :proxy_url] + |> Config.get() + |> AdapterHelper.format_proxy() + + config_opts = Config.get([:http, :adapter], []) + + config_opts + |> Keyword.merge(incoming_opts) + |> AdapterHelper.maybe_add_proxy(proxy) + |> maybe_stream() + end + + # Finch uses [response: :stream] + defp maybe_stream(opts) do + case Keyword.pop(opts, :stream, nil) do + {true, opts} -> Keyword.put(opts, :response, :stream) + {_, opts} -> opts + end + end +end diff --git a/lib/pleroma/http/adapter_helper/gun.ex b/lib/pleroma/http/adapter_helper/gun.ex @@ -32,6 +32,7 @@ defmodule Pleroma.HTTP.AdapterHelper.Gun do |> AdapterHelper.maybe_add_proxy(proxy) |> Keyword.merge(incoming_opts) |> put_timeout() + |> maybe_stream() end defp add_scheme_opts(opts, %{scheme: "http"}), do: opts @@ -47,6 +48,14 @@ defmodule Pleroma.HTTP.AdapterHelper.Gun do Keyword.put(opts, :timeout, recv_timeout) end + # Gun uses [body_as: :stream] + defp maybe_stream(opts) do + case Keyword.pop(opts, :stream, nil) do + {true, opts} -> Keyword.put(opts, :body_as, :stream) + {_, opts} -> opts + end + end + @spec pool_timeout(pool()) :: non_neg_integer() def pool_timeout(pool) do default = Config.get([:pools, :default, :recv_timeout], 5_000) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex @@ -11,16 +11,39 @@ defmodule Pleroma.Web.RichMedia.Helpers do @spec rich_media_get(String.t()) :: {:ok, String.t()} | get_errors() def rich_media_get(url) do - headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] + case Pleroma.HTTP.AdapterHelper.can_stream?() do + true -> stream(url) + false -> head_first(url) + end + |> handle_result(url) + end + + defp stream(url) do + with {_, {:ok, %Tesla.Env{status: 200, body: stream_body, headers: headers}}} <- + {:get, Pleroma.HTTP.get(url, req_headers(), http_options())}, + {_, :ok} <- {:content_type, check_content_type(headers)}, + {_, :ok} <- {:content_length, check_content_length(headers)}, + {:read_stream, {:ok, body}} <- {:read_stream, read_stream(stream_body)} do + {:ok, body} + end + end + defp head_first(url) do with {_, {:ok, %Tesla.Env{status: 200, headers: headers}}} <- - {:head, Pleroma.HTTP.head(url, headers, http_options())}, + {:head, Pleroma.HTTP.head(url, req_headers(), http_options())}, {_, :ok} <- {:content_type, check_content_type(headers)}, {_, :ok} <- {:content_length, check_content_length(headers)}, {_, {:ok, %Tesla.Env{status: 200, body: body}}} <- - {:get, Pleroma.HTTP.get(url, headers, http_options())} do + {:get, Pleroma.HTTP.get(url, req_headers(), http_options())} do {:ok, body} - else + end + end + + defp handle_result(result, url) do + case result do + {:ok, body} -> + {:ok, body} + {:head, _} -> Logger.debug("Rich media error for #{url}: HTTP HEAD failed") {:error, :head} @@ -29,8 +52,12 @@ defmodule Pleroma.Web.RichMedia.Helpers do Logger.debug("Rich media error for #{url}: content-type is #{type}") {:error, :content_type} - {:content_length, {_, length}} -> - Logger.debug("Rich media error for #{url}: content-length is #{length}") + {:content_length, :error} -> + Logger.debug("Rich media error for #{url}: content-length exceeded") + {:error, :body_too_large} + + {:read_stream, :error} -> + Logger.debug("Rich media error for #{url}: content-length exceeded") {:error, :body_too_large} {:get, _} -> @@ -59,7 +86,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do {_, maybe_content_length} -> case Integer.parse(maybe_content_length) do {content_length, ""} when content_length <= max_body -> :ok - {_, ""} -> {:error, maybe_content_length} + {_, ""} -> :error _ -> :ok end @@ -68,13 +95,37 @@ defmodule Pleroma.Web.RichMedia.Helpers do end end - defp http_options do - timeout = Config.get!([:rich_media, :timeout]) + defp read_stream(stream) do + max_body = Keyword.get(http_options(), :max_body) + + try do + result = + Stream.transform(stream, 0, fn chunk, total_bytes -> + new_total = total_bytes + byte_size(chunk) + + if new_total > max_body do + raise("Exceeds max body limit of #{max_body}") + else + {[chunk], new_total} + end + end) + |> Enum.into(<<>>) + {:ok, result} + rescue + _ -> :error + end + end + + defp http_options do [ pool: :rich_media, max_body: Config.get([:rich_media, :max_body], 5_000_000), - tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}] + stream: true ] end + + defp req_headers do + [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] + end end