commit: 94e4f215896dc7976a54fd146daf3e286602925a
parent f726e5fbbdab89dd2a8cb498112ed6866047d3d1
Author: Lain Soykaf <lain@lain.com>
Date: Thu, 23 May 2024 14:38:30 +0400
QdrantSearch: Deal with actor restrictions
Diffstat:
2 files changed, 114 insertions(+), 3 deletions(-)
diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex
@@ -43,23 +43,41 @@ defmodule Pleroma.Search.QdrantSearch do
end
end
+ defp actor_from_activity(%{data: %{"actor" => actor}}) do
+ actor
+ end
+
+ defp actor_from_activity(_), do: nil
+
defp build_index_payload(activity, embedding) do
+ actor = actor_from_activity(activity)
+ published_at = activity.data["published"]
+
%{
points: [
%{
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
- vector: embedding
+ vector: embedding,
+ payload: %{actor: actor, published_at: published_at}
}
]
}
end
defp build_search_payload(embedding, options) do
- %{
+ base = %{
vector: embedding,
limit: options[:limit] || 20,
offset: options[:offset] || 0
}
+
+ if options[:actor] do
+ Map.put(base, :filter, %{
+ must: [%{key: "actor", match: %{value: options[:actor].ap_id}}]
+ })
+ else
+ base
+ end
end
@impl true
diff --git a/test/pleroma/search/qdrant_search_test.exs b/test/pleroma/search/qdrant_search_test.exs
@@ -15,6 +15,94 @@ defmodule Pleroma.Search.QdrantSearchTest do
alias Pleroma.Workers.SearchIndexingWorker
describe "Qdrant search" do
+ test "searches for a term by encoding it and sending it to qdrant" do
+ user = insert(:user)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ Config
+ |> expect(:get, 3, fn
+ [Pleroma.Search, :module], nil ->
+ QdrantSearch
+
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{
+ openai_model: "a_model",
+ openai_url: "https://openai.url",
+ qdrant_url: "https://qdrant.url"
+ }[key]
+ end)
+
+ Tesla.Mock.mock(fn
+ %{url: "https://openai.url/v1/embeddings", method: :post} ->
+ Tesla.Mock.json(%{
+ data: [%{embedding: [1, 2, 3]}]
+ })
+
+ %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
+ data = Jason.decode!(body)
+ refute data["filter"]
+
+ Tesla.Mock.json(%{
+ result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
+ })
+ end)
+
+ results = QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{})
+
+ assert results == [activity]
+ end
+
+ test "for a given actor, ask for only relevant matches" do
+ user = insert(:user)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ Config
+ |> expect(:get, 3, fn
+ [Pleroma.Search, :module], nil ->
+ QdrantSearch
+
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{
+ openai_model: "a_model",
+ openai_url: "https://openai.url",
+ qdrant_url: "https://qdrant.url"
+ }[key]
+ end)
+
+ Tesla.Mock.mock(fn
+ %{url: "https://openai.url/v1/embeddings", method: :post} ->
+ Tesla.Mock.json(%{
+ data: [%{embedding: [1, 2, 3]}]
+ })
+
+ %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
+ data = Jason.decode!(body)
+
+ assert data["filter"] == %{
+ "must" => [%{"key" => "actor", "match" => %{"value" => user.ap_id}}]
+ }
+
+ Tesla.Mock.json(%{
+ result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
+ })
+ end)
+
+ results =
+ QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{actor: user})
+
+ assert results == [activity]
+ end
+
test "indexes a public post on creation, deletes from the index on deletion" do
user = insert(:user)
@@ -29,7 +117,12 @@ defmodule Pleroma.Search.QdrantSearchTest do
%{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} ->
send(self(), "posted_to_qdrant")
- assert match?(%{"points" => [%{"vector" => [1, 2, 3]}]}, Jason.decode!(body))
+ data = Jason.decode!(body)
+ %{"points" => [%{"vector" => vector, "payload" => payload}]} = data
+
+ assert vector == [1, 2, 3]
+ assert payload["actor"]
+ assert payload["published_at"]
Tesla.Mock.json("ok")