logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://anongit.hacktivis.me/git/pleroma.git/
commit: b1acc9281a69602b71ba35166e787efd000efa50
parent 93c144e397d408d7ff1761640e12fb51e333b2ce
Author: Mark Felder <feld@feld.me>
Date:   Thu, 31 Jul 2025 18:02:33 -0700

Use ranking to improve order of results

Diffstat:

Mlib/pleroma/hashtag.ex36+++++++++++++++++++++++++++++++++---
Mtest/pleroma/hashtag_test.exs36++++++++++++++++++++++++++++++++----
2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex @@ -149,9 +149,39 @@ defmodule Pleroma.Hashtag do # This is much more efficient than multiple OR clauses search_patterns = Enum.map(search_terms, &"%#{&1}%") - from(ht in Hashtag, - where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns), - order_by: [asc: ht.name], + # Create ranking query that prioritizes exact matches and closer matches + # Use a subquery to properly handle computed columns in ORDER BY + base_query = + from(ht in Hashtag, + where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns), + select: %{ + name: ht.name, + # Ranking: exact matches get highest priority (0), then prefix matches (1), then contains (2) + match_rank: + fragment( + """ + CASE + WHEN LOWER(?) = ANY(?) THEN 0 + WHEN LOWER(?) LIKE ANY(?) THEN 1 + ELSE 2 + END + """, + ht.name, + ^search_terms, + ht.name, + ^Enum.map(search_terms, &"#{&1}%") + ), + # Secondary sort by name length (shorter names first) + name_length: fragment("LENGTH(?)", ht.name) + } + ) + + from(result in subquery(base_query), + order_by: [ + asc: result.match_rank, + asc: result.name_length, + asc: result.name + ], limit: ^limit, offset: ^offset ) diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs @@ -39,7 +39,6 @@ defmodule Pleroma.HashtagTest do end test "searches hashtags by multiple words in query" do - # Create some hashtags {:ok, _} = Hashtag.get_or_create_by_name("computer") {:ok, _} = Hashtag.get_or_create_by_name("laptop") {:ok, _} = Hashtag.get_or_create_by_name("desktop") @@ -80,19 +79,48 @@ defmodule Pleroma.HashtagTest do assert length(results) == 2 end - test "handles many search terms efficiently" do - # Create hashtags + test "handles matching many search terms" do {:ok, _} = Hashtag.get_or_create_by_name("computer") {:ok, _} = Hashtag.get_or_create_by_name("laptop") {:ok, _} = Hashtag.get_or_create_by_name("phone") {:ok, _} = Hashtag.get_or_create_by_name("tablet") - # Search with many terms - should be efficient with PostgreSQL ANY operator results = Hashtag.search("new fast computer laptop phone tablet device") assert "computer" in results assert "laptop" in results assert "phone" in results assert "tablet" in results end + + test "ranks results by match quality" do + {:ok, _} = Hashtag.get_or_create_by_name("my_computer") + {:ok, _} = Hashtag.get_or_create_by_name("computer_science") + {:ok, _} = Hashtag.get_or_create_by_name("computer") + + results = Hashtag.search("computer") + + # Exact match first + assert Enum.at(results, 0) == "computer" + + # Prefix match would be next + assert Enum.at(results, 1) == "computer_science" + + # worst match is last + assert Enum.at(results, 2) == "my_computer" + end + + test "prioritizes shorter names when ranking is equal" do + # Create hashtags with same ranking but different lengths + {:ok, _} = Hashtag.get_or_create_by_name("car") + {:ok, _} = Hashtag.get_or_create_by_name("racecar") + {:ok, _} = Hashtag.get_or_create_by_name("nascar") + + # Search for "car" - shorter names should come first + results = Hashtag.search("car") + # Shortest exact match first + assert Enum.at(results, 0) == "car" + assert "racecar" in results + assert "nascar" in results + end end end