commit: 35e9192cedcbc56fb07c9933e2988bf900256b53
parent 3dedadf192a3acd0c1dfc2b11eba5a247ae7f61c
Author: Ekaterina Vaartis <vaartis@kotobank.ch>
Date: Sun, 22 Aug 2021 22:53:18 +0300
Rework task indexing to share code with the main module
The code in the main module now scrubs new posts too
Diffstat:
2 files changed, 34 insertions(+), 47 deletions(-)
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex
@@ -51,40 +51,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
),
timeout: :infinity
)
+ |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
+ |> Stream.filter(fn o -> not is_nil(o) end)
|> Stream.chunk_every(chunk_size)
- |> Stream.map(fn objects ->
- Enum.map(objects, fn object ->
- data = object.data
-
- content_str =
- case data["content"] do
- [nil | rest] -> to_string(rest)
- str -> str
- end
-
- {:ok, published, _} = DateTime.from_iso8601(data["published"])
-
- content =
- with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
- trimmed <- String.trim(scrubbed) do
- trimmed
- end
-
- # Only index if there is anything in the string. If there is a single symbol,
- # it's probably a dot from mastodon posts with just the picture
- if String.length(content) > 1 do
- %{
- id: object.id,
- content: content,
- ap: data["id"],
- published: published |> DateTime.to_unix()
- }
- else
- nil
- end
- end)
- |> Enum.filter(fn o -> not is_nil(o) end)
- end)
|> Stream.transform(0, fn objects, acc ->
new_acc = acc + Enum.count(objects)
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
@@ -39,28 +39,46 @@ defmodule Pleroma.Search.Meilisearch do
end
end
- def add_to_index(activity) do
- object = activity.object
-
- if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and
+ def object_to_search_data(object) do
+ if not is_nil(object) and object.data["type"] == "Note" and
Pleroma.Constants.as_public() in object.data["to"] do
data = object.data
- endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+ content_str =
+ case data["content"] do
+ [nil | rest] -> to_string(rest)
+ str -> str
+ end
+
+ content =
+ with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
+ trimmed <- String.trim(scrubbed) do
+ trimmed
+ end
+
+ if String.length(content) > 1 do
+ {:ok, published, _} = DateTime.from_iso8601(data["published"])
+
+ %{
+ id: object.id,
+ content: content,
+ ap: data["id"],
+ published: published |> DateTime.to_unix()
+ }
+ end
+ end
+ end
- {:ok, published, _} = DateTime.from_iso8601(data["published"])
+ def add_to_index(activity) do
+ maybe_search_data = object_to_search_data(activity)
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
{:ok, result} =
Pleroma.HTTP.post(
"#{endpoint}/indexes/objects/documents",
- Jason.encode!([
- %{
- id: object.id,
- content: data["content"] |> Pleroma.HTML.filter_tags(),
- ap: data["id"],
- published: published |> DateTime.to_unix()
- }
- ])
+ Jason.encode!([maybe_search_data])
)
if not Map.has_key?(Jason.decode!(result.body), "updateId") do