logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 35e9192cedcbc56fb07c9933e2988bf900256b53
parent 3dedadf192a3acd0c1dfc2b11eba5a247ae7f61c
Author: Ekaterina Vaartis <vaartis@kotobank.ch>
Date:   Sun, 22 Aug 2021 22:53:18 +0300

Rework task indexing to share code with the main module

The code in the main module now scrubs new posts too

Diffstat:

Mlib/mix/tasks/pleroma/search/meilisearch.ex35++---------------------------------
Mlib/pleroma/search/meilisearch.ex46++++++++++++++++++++++++++++++++--------------
2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -51,40 +51,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ), timeout: :infinity ) + |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.filter(fn o -> not is_nil(o) end) |> Stream.chunk_every(chunk_size) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - - content_str = - case data["content"] do - [nil | rest] -> to_string(rest) - str -> str - end - - {:ok, published, _} = DateTime.from_iso8601(data["published"]) - - content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), - trimmed <- String.trim(scrubbed) do - trimmed - end - - # Only index if there is anything in the string. If there is a single symbol, - # it's probably a dot from mastodon posts with just the picture - if String.length(content) > 1 do - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } - else - nil - end - end) - |> Enum.filter(fn o -> not is_nil(o) end) - end) |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex @@ -39,28 +39,46 @@ defmodule Pleroma.Search.Meilisearch do end end - def add_to_index(activity) do - object = activity.object - - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + def object_to_search_data(object) do + if not is_nil(object) and object.data["type"] == "Note" and Pleroma.Constants.as_public() in object.data["to"] do data = object.data - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + if String.length(content) > 1 do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end - {:ok, published, _} = DateTime.from_iso8601(data["published"]) + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity) + + if activity.data["type"] == "Create" and maybe_search_data do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([ - %{ - id: object.id, - content: data["content"] |> Pleroma.HTML.filter_tags(), - ap: data["id"], - published: published |> DateTime.to_unix() - } - ]) + Jason.encode!([maybe_search_data]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do