commit: ea6a6a128712e81c4f298b2bb2cedfadf2295cff
parent 365024abec905e427babb5403f0fccbde65f4bcd
Author: Ekaterina Vaartis <vaartis@kotobank.ch>
Date: Mon, 16 Aug 2021 22:30:56 +0300
Make the indexing batch differently and more, show number indexed
Diffstat:
1 file changed, 38 insertions(+), 25 deletions(-)
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex
@@ -28,33 +28,46 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
])
)
- Pleroma.Repo.chunk_stream(
- from(Pleroma.Object,
- # Only index public posts which are notes and have some text
- where:
- fragment("data->>'type' = 'Note'") and
- fragment("LENGTH(data->>'source') > 0") and
- fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public())
- ),
- 200,
- :batches
- )
- |> Stream.map(fn objects ->
- Enum.map(objects, fn object ->
- data = object.data
- %{id: object.id, source: data["source"], ap: data["id"]}
- end)
- end)
- |> Stream.each(fn objects ->
- {:ok, _} =
- Pleroma.HTTP.post(
- "#{endpoint}/indexes/objects/documents",
- Jason.encode!(objects)
+ chunk_size = 100_000
+
+ Pleroma.Repo.transaction(
+ fn ->
+ Pleroma.Repo.stream(
+ from(Pleroma.Object,
+ # Only index public posts which are notes and have some text
+ where:
+ fragment("data->>'type' = 'Note'") and
+ fragment("LENGTH(data->>'source') > 0") and
+ fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
+ order_by: fragment("data->'published' DESC")
+ ),
+ timeout: :infinity
)
+ |> Stream.chunk_every(chunk_size)
+ |> Stream.transform(0, fn objects, acc ->
+ new_acc = acc + Enum.count(objects)
- IO.puts("Indexed #{Enum.count(objects)} entries")
- end)
- |> Stream.run()
+ IO.puts("Indexed #{new_acc} entries")
+
+ {[objects], new_acc}
+ end)
+ |> Stream.map(fn objects ->
+ Enum.map(objects, fn object ->
+ data = object.data
+ %{id: object.id, source: data["source"], ap: data["id"]}
+ end)
+ end)
+ |> Stream.each(fn objects ->
+ {:ok, _} =
+ Pleroma.HTTP.post(
+ "#{endpoint}/indexes/objects/documents",
+ Jason.encode!(objects)
+ )
+ end)
+ |> Stream.run()
+ end,
+ timeout: :infinity
+ )
end
def run(["clear"]) do