commit: b1acc9281a69602b71ba35166e787efd000efa50
parent 93c144e397d408d7ff1761640e12fb51e333b2ce
Author: Mark Felder <feld@feld.me>
Date: Thu, 31 Jul 2025 18:02:33 -0700
Use ranking to improve order of results
Diffstat:
2 files changed, 65 insertions(+), 7 deletions(-)
diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex
@@ -149,9 +149,39 @@ defmodule Pleroma.Hashtag do
# This is much more efficient than multiple OR clauses
search_patterns = Enum.map(search_terms, &"%#{&1}%")
- from(ht in Hashtag,
- where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
- order_by: [asc: ht.name],
+ # Create ranking query that prioritizes exact matches and closer matches
+ # Use a subquery to properly handle computed columns in ORDER BY
+ base_query =
+ from(ht in Hashtag,
+ where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
+ select: %{
+ name: ht.name,
+ # Ranking: exact matches get highest priority (0), then prefix matches (1), then contains (2)
+ match_rank:
+ fragment(
+ """
+ CASE
+ WHEN LOWER(?) = ANY(?) THEN 0
+ WHEN LOWER(?) LIKE ANY(?) THEN 1
+ ELSE 2
+ END
+ """,
+ ht.name,
+ ^search_terms,
+ ht.name,
+ ^Enum.map(search_terms, &"#{&1}%")
+ ),
+ # Secondary sort by name length (shorter names first)
+ name_length: fragment("LENGTH(?)", ht.name)
+ }
+ )
+
+ from(result in subquery(base_query),
+ order_by: [
+ asc: result.match_rank,
+ asc: result.name_length,
+ asc: result.name
+ ],
limit: ^limit,
offset: ^offset
)
diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs
@@ -39,7 +39,6 @@ defmodule Pleroma.HashtagTest do
end
test "searches hashtags by multiple words in query" do
- # Create some hashtags
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("desktop")
@@ -80,19 +79,48 @@ defmodule Pleroma.HashtagTest do
assert length(results) == 2
end
- test "handles many search terms efficiently" do
- # Create hashtags
+ test "handles matching many search terms" do
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("phone")
{:ok, _} = Hashtag.get_or_create_by_name("tablet")
- # Search with many terms - should be efficient with PostgreSQL ANY operator
results = Hashtag.search("new fast computer laptop phone tablet device")
assert "computer" in results
assert "laptop" in results
assert "phone" in results
assert "tablet" in results
end
+
+ test "ranks results by match quality" do
+ {:ok, _} = Hashtag.get_or_create_by_name("my_computer")
+ {:ok, _} = Hashtag.get_or_create_by_name("computer_science")
+ {:ok, _} = Hashtag.get_or_create_by_name("computer")
+
+ results = Hashtag.search("computer")
+
+ # Exact match first
+ assert Enum.at(results, 0) == "computer"
+
+ # Prefix match would be next
+ assert Enum.at(results, 1) == "computer_science"
+
+ # worst match is last
+ assert Enum.at(results, 2) == "my_computer"
+ end
+
+ test "prioritizes shorter names when ranking is equal" do
+ # Create hashtags with same ranking but different lengths
+ {:ok, _} = Hashtag.get_or_create_by_name("car")
+ {:ok, _} = Hashtag.get_or_create_by_name("racecar")
+ {:ok, _} = Hashtag.get_or_create_by_name("nascar")
+
+ # Search for "car" - shorter names should come first
+ results = Hashtag.search("car")
+ # Shortest exact match first
+ assert Enum.at(results, 0) == "car"
+ assert "racecar" in results
+ assert "nascar" in results
+ end
end
end