commit: 5a39866388c411f2bcee9848352f8c420513f34f
parent 6256822afd368e5f6b410d47c5ff9b584e50a461
Author: Ekaterina Vaartis <vaartis@kotobank.ch>
Date: Sat, 27 Aug 2022 01:43:59 +0300
Specifically strip mentions for search indexing
Diffstat:
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex
@@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
def run(["index"]) do
start_pleroma()
+ Pleroma.HTML.compile_scrubbers()
meili_version =
(
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
@@ -122,7 +122,8 @@ defmodule Pleroma.Search.Meilisearch do
end
content =
- with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
+ with {:ok, scrubbed} <-
+ FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
trimmed <- String.trim(scrubbed) do
trimmed
end
diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex
@@ -0,0 +1,24 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.HTML.Scrubber.SearchIndexing do
+ @moduledoc """
+ An HTML scrubbing policy that scrubs things for searching.
+ """
+
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ # Explicitly remove mentions
+ def scrub({:a, attrs, children}) do
+ if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
+ do: nil,
+ # Strip the tag itself, leave only children (text, presumably)
+ else: children
+ )
+ end
+
+ Meta.strip_comments()
+ Meta.strip_everything_not_covered()
+end