logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 5a39866388c411f2bcee9848352f8c420513f34f
parent 6256822afd368e5f6b410d47c5ff9b584e50a461
Author: Ekaterina Vaartis <vaartis@kotobank.ch>
Date:   Sat, 27 Aug 2022 01:43:59 +0300

Specifically strip mentions for search indexing

Diffstat:

Mlib/mix/tasks/pleroma/search/meilisearch.ex1+
Mlib/pleroma/search/meilisearch.ex3++-
Apriv/scrubbers/search_indexing.ex24++++++++++++++++++++++++
3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["index"]) do start_pleroma() + Pleroma.HTML.compile_scrubbers() meili_version = ( diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex @@ -122,7 +122,8 @@ defmodule Pleroma.Search.Meilisearch do end content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), trimmed <- String.trim(scrubbed) do trimmed end diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex @@ -0,0 +1,24 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTML.Scrubber.SearchIndexing do + @moduledoc """ + An HTML scrubbing policy that scrubs things for searching. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + # Explicitly remove mentions + def scrub({:a, attrs, children}) do + if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end), + do: nil, + # Strip the tag itself, leave only children (text, presumably) + else: children + ) + end + + Meta.strip_comments() + Meta.strip_everything_not_covered() +end