logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: 33e3a7ba7dab750f2a5f85e22b0e44251ec308d2
parent: 8e7da98b18a6f43e6bfd7321904f626cebd357bb
Author: lambda <pleromagit@rogerbraun.net>
Date:   Mon, 31 Dec 2018 09:56:26 +0000

Merge branch 'fix/scrubber-cache' into 'develop'

[#481] Store scrubbed posts in Cachex

Closes #481

See merge request pleroma/pleroma!610

Diffstat:

Mlib/pleroma/application.ex10++++++++++
Mlib/pleroma/html.ex53+++++++++++++++++++++++++++++++++++++++++++++++++++--
Mlib/pleroma/user.ex4+++-
Mlib/pleroma/web/mastodon_api/views/status_view.ex2+-
Mlib/pleroma/web/twitter_api/views/activity_view.ex6+++---
Mtest/user_test.exs4++--
6 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex @@ -56,6 +56,16 @@ defmodule Pleroma.Application do worker( Cachex, [ + :scrubber_cache, + [ + limit: 2500 + ] + ], + id: :cachex_scrubber + ), + worker( + Cachex, + [ :idempotency_cache, [ expiration: diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex @@ -15,8 +15,11 @@ defmodule Pleroma.HTML do end def filter_tags(html, nil) do - get_scrubbers() - |> Enum.reduce(html, fn scrubber, html -> + filter_tags(html, get_scrubbers()) + end + + def filter_tags(html, scrubbers) when is_list(scrubbers) do + Enum.reduce(scrubbers, html, fn scrubber, html -> filter_tags(html, scrubber) end) end @@ -24,6 +27,40 @@ defmodule Pleroma.HTML do def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) def filter_tags(html), do: filter_tags(html, nil) def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) + + def get_cached_scrubbed_html_for_object(content, scrubbers, object) do + key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" + Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) + end + + def get_cached_stripped_html_for_object(content, object) do + get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object) + end + + def ensure_scrubbed_html( + content, + scrubbers + ) do + {:commit, filter_tags(content, scrubbers)} + end + + defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do + generate_scrubber_signature([scrubber]) + end + + defp generate_scrubber_signature(scrubbers) do + Enum.reduce(scrubbers, "", fn scrubber, signature -> + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) + version = + if Kernel.function_exported?(scrubber, :version, 0) do + scrubber.version + else + 0 + end + + "#{signature}#{to_string(scrubber)}#{version}" + end) + end end defmodule Pleroma.HTML.Scrubber.TwitterText do @@ -39,6 +76,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -77,6 +118,10 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -154,6 +199,10 @@ defmodule Pleroma.HTML.Transform.MediaProxy do alias Pleroma.Web.MediaProxy + def version do + 0 + end + def before_scrub(html), do: html def scrub_attribute("img", {"src", "http" <> target}) do diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex @@ -786,7 +786,9 @@ defmodule Pleroma.User do Pleroma.HTML.Scrubber.TwitterText end - def html_filter_policy(_), do: nil + @default_scrubbers Pleroma.Config.get([:markup, :scrub_policy]) + + def html_filter_policy(_), do: @default_scrubbers def get_or_fetch_by_ap_id(ap_id) do user = get_by_ap_id(ap_id) diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do content = object |> render_content() - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -11,11 +11,11 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do alias Pleroma.Web.TwitterAPI.TwitterAPI alias Pleroma.Web.TwitterAPI.Representers.ObjectRepresenter alias Pleroma.Activity + alias Pleroma.HTML alias Pleroma.Object alias Pleroma.User alias Pleroma.Repo alias Pleroma.Formatter - alias Pleroma.HTML import Ecto.Query require Logger @@ -245,14 +245,14 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do html = content - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r/<br\s?\/?>/, "\n") - |> HTML.strip_tags() + |> HTML.get_cached_stripped_html_for_object(activity) end reply_parent = Activity.get_in_reply_to_activity(activity) diff --git a/test/user_test.exs b/test/user_test.exs @@ -706,10 +706,10 @@ defmodule Pleroma.UserTest do end describe "per-user rich-text filtering" do - test "html_filter_policy returns nil when rich-text is enabled" do + test "html_filter_policy returns default policies, when rich-text is enabled" do user = insert(:user) - assert nil == User.html_filter_policy(user) + assert Pleroma.Config.get([:markup, :scrub_policy]) == User.html_filter_policy(user) end test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do