logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: f3045a179e40bd8f670be588a8e93b8d05e95d27
parent: 42612b1c8d356843b9e8785d3a91072f38fb50cf
Author: lambda <pleromagit@rogerbraun.net>
Date:   Sun, 20 Jan 2019 10:24:05 +0000

Merge branch 'i1t/pleroma-477_user_search_improvements' into 'develop'

I1t/pleroma 477 user search improvements

See merge request pleroma/pleroma!685

Diffstat:

Mlib/pleroma/user.ex143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mlib/pleroma/web/mastodon_api/mastodon_api_controller.ex6+++---
Mlib/pleroma/web/twitter_api/twitter_api_controller.ex2+-
Apriv/repo/migrations/20190115085500_create_user_fts_index.exs17+++++++++++++++++
Apriv/repo/migrations/20190118074940_fix_user_trigram_index.exs22++++++++++++++++++++++
Mtest/user_test.exs71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mtest/web/twitter_api/twitter_api_controller_test.exs10+++++-----
7 files changed, 230 insertions(+), 41 deletions(-)

diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex @@ -35,7 +35,7 @@ defmodule Pleroma.User do field(:avatar, :map) field(:local, :boolean, default: true) field(:follower_address, :string) - field(:search_distance, :float, virtual: true) + field(:search_rank, :float, virtual: true) field(:tags, {:array, :string}, default: []) field(:last_refreshed_at, :naive_datetime) has_many(:notifications, Notification) @@ -510,6 +510,12 @@ defmodule Pleroma.User do {:ok, Repo.all(q)} end + def get_followers_ids(user, page \\ nil) do + q = get_followers_query(user, page) + + Repo.all(from(u in q, select: u.id)) + end + def get_friends_query(%User{id: id, following: following}, nil) do from( u in User, @@ -534,6 +540,12 @@ defmodule Pleroma.User do {:ok, Repo.all(q)} end + def get_friends_ids(user, page \\ nil) do + q = get_friends_query(user, page) + + Repo.all(from(u in q, select: u.id)) + end + def get_follow_requests_query(%User{} = user) do from( a in Activity, @@ -665,37 +677,120 @@ defmodule Pleroma.User do Repo.all(query) end - def search(query, resolve \\ false) do - # strip the beginning @ off if there is a query + def search(query, resolve \\ false, for_user \\ nil) do + # Strip the beginning @ off if there is a query query = String.trim_leading(query, "@") - if resolve do - User.get_or_fetch_by_nickname(query) - end + if resolve, do: User.get_or_fetch_by_nickname(query) - inner = - from( - u in User, - select_merge: %{ - search_distance: - fragment( - "? <-> (? || coalesce(?, ''))", - ^query, - u.nickname, - u.name - ) - }, - where: not is_nil(u.nickname) - ) + fts_results = do_search(fts_search_subquery(query), for_user) + + {:ok, trigram_results} = + Repo.transaction(fn -> + Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) + do_search(trigram_search_subquery(query), for_user) + end) + + Enum.uniq_by(fts_results ++ trigram_results, & &1.id) + end + defp do_search(subquery, for_user, options \\ []) do q = from( - s in subquery(inner), - order_by: s.search_distance, - limit: 20 + s in subquery(subquery), + order_by: [desc: s.search_rank], + limit: ^(options[:limit] || 20) ) - Repo.all(q) + results = + q + |> Repo.all() + |> Enum.filter(&(&1.search_rank > 0)) + + boost_search_results(results, for_user) + end + + defp fts_search_subquery(query) do + processed_query = + query + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in User, + select_merge: %{ + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: + fragment( + """ + (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) + """, + u.nickname, + u.name, + ^processed_query + ) + ) + end + + defp trigram_search_subquery(query) do + from( + u in User, + select_merge: %{ + search_rank: + fragment( + "similarity(?, trim(? || ' ' || coalesce(?, '')))", + ^query, + u.nickname, + u.name + ) + }, + where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^query) + ) + end + + defp boost_search_results(results, nil), do: results + + defp boost_search_results(results, for_user) do + friends_ids = get_friends_ids(for_user) + followers_ids = get_followers_ids(for_user) + + Enum.map( + results, + fn u -> + search_rank_coef = + cond do + u.id in friends_ids -> + 1.2 + + u.id in followers_ids -> + 1.1 + + true -> + 1 + end + + Map.put(u, :search_rank, u.search_rank * search_rank_coef) + end + ) + |> Enum.sort_by(&(-&1.search_rank)) end def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -771,7 +771,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) statuses = status_search(user, query) @@ -795,7 +795,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) statuses = status_search(user, query) @@ -816,7 +816,7 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end def account_search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do - accounts = User.search(query, params["resolve"] == "true") + accounts = User.search(query, params["resolve"] == "true", user) res = AccountView.render("accounts.json", users: accounts, for: user, as: :user) diff --git a/lib/pleroma/web/twitter_api/twitter_api_controller.ex b/lib/pleroma/web/twitter_api/twitter_api_controller.ex @@ -675,7 +675,7 @@ defmodule Pleroma.Web.TwitterAPI.Controller do end def search_user(%{assigns: %{user: user}} = conn, %{"query" => query}) do - users = User.search(query, true) + users = User.search(query, true, user) conn |> put_view(UserView) diff --git a/priv/repo/migrations/20190115085500_create_user_fts_index.exs b/priv/repo/migrations/20190115085500_create_user_fts_index.exs @@ -0,0 +1,17 @@ +defmodule Pleroma.Repo.Migrations.CreateUserFtsIndex do + use Ecto.Migration + + def change do + create index( + :users, + [ + """ + (setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B')) + """ + ], + name: :users_fts_index, + using: :gin + ) + end +end diff --git a/priv/repo/migrations/20190118074940_fix_user_trigram_index.exs b/priv/repo/migrations/20190118074940_fix_user_trigram_index.exs @@ -0,0 +1,22 @@ +defmodule Pleroma.Repo.Migrations.FixUserTrigramIndex do + use Ecto.Migration + + def up do + drop_if_exists(index(:users, [], name: :users_trigram_index)) + + create( + index(:users, ["(trim(nickname || ' ' || coalesce(name, ''))) gist_trgm_ops"], + name: :users_trigram_index, + using: :gist + ) + ) + end + + def down do + drop_if_exists(index(:users, [], name: :users_trigram_index)) + + create( + index(:users, ["(nickname || name) gist_trgm_ops"], name: :users_trigram_index, using: :gist) + ) + end +end diff --git a/test/user_test.exs b/test/user_test.exs @@ -775,14 +775,61 @@ defmodule Pleroma.UserTest do end describe "User.search" do - test "finds a user, ranking by similarity" do - _user = insert(:user, %{name: "lain"}) - _user_two = insert(:user, %{name: "ean"}) - _user_three = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"}) - user_four = insert(:user, %{nickname: "lain@pleroma.soykaf.com"}) + test "finds a user by full or partial nickname" do + user = insert(:user, %{nickname: "john"}) - assert user_four == - User.search("lain@ple") |> List.first() |> Map.put(:search_distance, nil) + Enum.each(["john", "jo", "j"], fn query -> + assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil) + end) + end + + test "finds a user by full or partial name" do + user = insert(:user, %{name: "John Doe"}) + + Enum.each(["John Doe", "JOHN", "doe", "j d", "j", "d"], fn query -> + assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil) + end) + end + + test "finds users, preferring nickname matches over name matches" do + u1 = insert(:user, %{name: "lain", nickname: "nick1"}) + u2 = insert(:user, %{nickname: "lain", name: "nick1"}) + + assert [u2.id, u1.id] == Enum.map(User.search("lain"), & &1.id) + end + + test "finds users, considering density of matched tokens" do + u1 = insert(:user, %{name: "Bar Bar plus Word Word"}) + u2 = insert(:user, %{name: "Word Word Bar Bar Bar"}) + + assert [u2.id, u1.id] == Enum.map(User.search("bar word"), & &1.id) + end + + test "finds users, ranking by similarity" do + u1 = insert(:user, %{name: "lain"}) + _u2 = insert(:user, %{name: "ean"}) + u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"}) + u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"}) + + assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id) + end + + test "finds users, handling misspelled requests" do + u1 = insert(:user, %{name: "lain"}) + + assert [u1.id] == Enum.map(User.search("laiin"), & &1.id) + end + + test "finds users, boosting ranks of friends and followers" do + u1 = insert(:user) + u2 = insert(:user, %{name: "Doe"}) + follower = insert(:user, %{name: "Doe"}) + friend = insert(:user, %{name: "Doe"}) + + {:ok, follower} = User.follow(follower, u1) + {:ok, u1} = User.follow(u1, friend) + + assert [friend.id, follower.id, u2.id] == Enum.map(User.search("doe", false, u1), & &1.id) end test "finds a user whose name is nil" do @@ -792,7 +839,15 @@ defmodule Pleroma.UserTest do assert user_two == User.search("lain@pleroma.soykaf.com") |> List.first() - |> Map.put(:search_distance, nil) + |> Map.put(:search_rank, nil) + end + + test "does not yield false-positive matches" do + insert(:user, %{name: "John Doe"}) + + Enum.each(["mary", "a", ""], fn query -> + assert [] == User.search(query) + end) end end diff --git a/test/web/twitter_api/twitter_api_controller_test.exs b/test/web/twitter_api/twitter_api_controller_test.exs @@ -1655,16 +1655,16 @@ defmodule Pleroma.Web.TwitterAPI.ControllerTest do describe "GET /api/pleroma/search_user" do test "it returns users, ordered by similarity", %{conn: conn} do user = insert(:user, %{name: "eal"}) - user_two = insert(:user, %{name: "ean"}) - user_three = insert(:user, %{name: "ebn"}) + user_two = insert(:user, %{name: "eal me"}) + _user_three = insert(:user, %{name: "zzz"}) resp = conn - |> get(twitter_api_search__path(conn, :search_user), query: "eal") + |> get(twitter_api_search__path(conn, :search_user), query: "eal me") |> json_response(200) - assert length(resp) == 3 - assert [user.id, user_two.id, user_three.id] == Enum.map(resp, fn %{"id" => id} -> id end) + assert length(resp) == 2 + assert [user_two.id, user.id] == Enum.map(resp, fn %{"id" => id} -> id end) end end