commit: cd7e2138d11901fc7a0c8c2f22b7a5d57383a555
parent c954437cc02f92b5c48c29d2c12d21496d13f813
Author: Lain Soykaf <lain@lain.com>
Date: Tue, 14 May 2024 14:13:37 +0400
Search: Basic Qdrant/Ollama search
Diffstat:
3 files changed, 186 insertions(+), 0 deletions(-)
diff --git a/config/config.exs b/config/config.exs
@@ -915,6 +915,15 @@ config :pleroma, Pleroma.Application,
config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
+config :pleroma, Pleroma.Search.QdrantSearch,
+ qdrant_url: "http://127.0.0.1:6333/",
+ qdrant_api_key: nil,
+ ollama_url: "http://127.0.0.1:11434",
+ ollama_model: "snowflake-arctic-embed:xs",
+ qdrant_index_configuration: %{
+ vectors: %{size: 384, distance: "Cosine"}
+ }
+
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"
diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex
@@ -0,0 +1,60 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Indexer do
+ import Mix.Pleroma
+ import Ecto.Query
+
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ def run(["index" | options]) do
+ {options, [], []} =
+ OptionParser.parse(
+ options,
+ strict: [
+ limit: :integer
+ ]
+ )
+
+ start_pleroma()
+
+ limit = Keyword.get(options, :limit, 100_000)
+
+ per_step = 1000
+ chunks = max(div(limit, per_step), 1)
+
+ 1..chunks
+ |> Enum.each(fn step ->
+ q =
+ from(a in Pleroma.Activity,
+ limit: ^per_step,
+ offset: ^per_step * (^step - 1),
+ select: [:id],
+ order_by: [desc: :id]
+ )
+
+ {:ok, ids} =
+ Pleroma.Repo.transaction(fn ->
+ Pleroma.Repo.stream(q, timeout: :infinity)
+ |> Enum.map(fn a ->
+ a.id
+ end)
+ end)
+
+ IO.puts("Got #{length(ids)} activities, adding to indexer")
+
+ ids
+ |> Enum.chunk_every(100)
+ |> Enum.each(fn chunk ->
+ IO.puts("Adding #{length(chunk)} activities to indexing queue")
+
+ chunk
+ |> Enum.map(fn id ->
+ SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
+ end)
+ |> Oban.insert_all()
+ end)
+ end)
+ end
+end
diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex
@@ -0,0 +1,117 @@
+defmodule Pleroma.Search.QdrantSearch do
+ @behaviour Pleroma.Search.SearchBackend
+ import Ecto.Query
+ alias Pleroma.Activity
+
+ alias __MODULE__.QdrantClient
+ alias __MODULE__.OllamaClient
+
+ import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
+
+ def initialize_index() do
+ payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
+ QdrantClient.put("/collections/posts", payload)
+ end
+
+ def drop_index() do
+ QdrantClient.delete("/collections/posts")
+ end
+
+ def get_embedding(text) do
+ with {:ok, %{body: %{"embedding" => embedding}}} <-
+ OllamaClient.post("/api/embeddings", %{
+ prompt: text,
+ model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
+ })
+ |> IO.inspect() do
+ {:ok, embedding}
+ else
+ _ ->
+ {:error, "Failed to get embedding"}
+ end
+ end
+
+ defp build_index_payload(activity, embedding) do
+ %{
+ points: [
+ %{
+ id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
+ vector: embedding
+ }
+ ]
+ }
+ end
+
+ defp build_search_payload(embedding) do
+ %{
+ vector: embedding,
+ limit: 20
+ }
+ end
+
+ @impl true
+ def add_to_index(activity) do
+ # This will only index public or unlisted notes
+ maybe_search_data = object_to_search_data(activity.object)
+ IO.puts("TRYING TO INDEX\n\n")
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ with {:ok, embedding} <- get_embedding(maybe_search_data.content),
+ {:ok, %{status: 200}} <-
+ QdrantClient.put(
+ "/collections/posts/points",
+ build_index_payload(activity, embedding)
+ ) do
+ :ok
+ else
+ e -> {:error, e}
+ end
+ else
+ :ok
+ end
+ end
+
+ @impl true
+ def search(_user, query, _options) do
+ with {:ok, embedding} <- get_embedding(query),
+ {:ok, %{body: %{"result" => result}}} <-
+ QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
+ ids =
+ Enum.map(result, fn %{"id" => id} ->
+ Ecto.UUID.dump!(id)
+ end)
+
+ from(a in Activity, where: a.id in ^ids)
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
+ |> Pleroma.Repo.all()
+ else
+ _ ->
+ []
+ end
+ end
+
+ @impl true
+ def remove_from_index(_object) do
+ :ok
+ end
+end
+
+defmodule Pleroma.Search.QdrantSearch.OllamaClient do
+ use Tesla
+
+ plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
+ plug(Tesla.Middleware.JSON)
+end
+
+defmodule Pleroma.Search.QdrantSearch.QdrantClient do
+ use Tesla
+
+ plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
+ plug(Tesla.Middleware.JSON)
+
+ plug(Tesla.Middleware.Headers, [
+ {"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
+ ])
+end