logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git

helpers.ex (3061B)


  1. # Pleroma: A lightweight social networking server
  2. # Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
  3. # SPDX-License-Identifier: AGPL-3.0-only
  4. defmodule Pleroma.Web.RichMedia.Helpers do
  5. alias Pleroma.Activity
  6. alias Pleroma.HTML
  7. alias Pleroma.Object
  8. alias Pleroma.Web.RichMedia.Parser
  9. @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
  10. @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
  11. @options [
  12. pool: :media,
  13. max_body: 2_000_000,
  14. recv_timeout: 2_000
  15. ]
  16. def fetch_data_for_object(object) do
  17. with true <- @config_impl.get([:rich_media, :enabled]),
  18. {:ok, page_url} <-
  19. HTML.extract_first_external_url_from_object(object),
  20. {:ok, rich_media} <- Parser.parse(page_url) do
  21. %{page_url: page_url, rich_media: rich_media}
  22. else
  23. _ -> %{}
  24. end
  25. end
  26. def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
  27. with true <- @config_impl.get([:rich_media, :enabled]),
  28. %Object{} = object <- Object.normalize(activity, fetch: false) do
  29. if object.data["fake"] do
  30. fetch_data_for_object(object)
  31. else
  32. key = "URL|#{activity.id}"
  33. @cachex.fetch!(:scrubber_cache, key, fn _ ->
  34. result = fetch_data_for_object(object)
  35. cond do
  36. match?(%{page_url: _, rich_media: _}, result) ->
  37. Activity.HTML.add_cache_key_for(activity.id, key)
  38. {:commit, result}
  39. true ->
  40. {:ignore, %{}}
  41. end
  42. end)
  43. end
  44. else
  45. _ -> %{}
  46. end
  47. end
  48. def fetch_data_for_activity(_), do: %{}
  49. def rich_media_get(url) do
  50. headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
  51. head_check =
  52. case Pleroma.HTTP.head(url, headers, @options) do
  53. # If the HEAD request didn't reach the server for whatever reason,
  54. # we assume the GET that comes right after won't either
  55. {:error, _} = e ->
  56. e
  57. {:ok, %Tesla.Env{status: 200, headers: headers}} ->
  58. with :ok <- check_content_type(headers),
  59. :ok <- check_content_length(headers),
  60. do: :ok
  61. _ ->
  62. :ok
  63. end
  64. with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, @options)
  65. end
  66. defp check_content_type(headers) do
  67. case List.keyfind(headers, "content-type", 0) do
  68. {_, content_type} ->
  69. case Plug.Conn.Utils.media_type(content_type) do
  70. {:ok, "text", "html", _} -> :ok
  71. _ -> {:error, {:content_type, content_type}}
  72. end
  73. _ ->
  74. :ok
  75. end
  76. end
  77. @max_body @options[:max_body]
  78. defp check_content_length(headers) do
  79. case List.keyfind(headers, "content-length", 0) do
  80. {_, maybe_content_length} ->
  81. case Integer.parse(maybe_content_length) do
  82. {content_length, ""} when content_length <= @max_body -> :ok
  83. {_, ""} -> {:error, :body_too_large}
  84. _ -> :ok
  85. end
  86. _ ->
  87. :ok
  88. end
  89. end
  90. end