commit: 62993871e40ba54d83fcfc8685587f2f0e80c7b6
parent 34efff85dad8a1d4963b91550220de5d4e1bb103
Author: feld <feld@feld.me>
Date: Sat, 2 Aug 2025 18:49:57 +0000
Merge branch 'hashtag-search' into 'develop'
Fix Hashtag search
See merge request pleroma/pleroma!4389
Diffstat:
5 files changed, 260 insertions(+), 114 deletions(-)
diff --git a/changelog.d/hashtag-search.change b/changelog.d/hashtag-search.change
@@ -0,0 +1 @@
+Hashtag searches return real results based on words in your query
diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex
@@ -130,4 +130,66 @@ defmodule Pleroma.Hashtag do
end
def get_recipients_for_activity(_activity), do: []
+
+ def search(query, options \\ []) do
+ limit = Keyword.get(options, :limit, 20)
+ offset = Keyword.get(options, :offset, 0)
+
+ search_terms =
+ query
+ |> String.downcase()
+ |> String.trim()
+ |> String.split(~r/\s+/)
+ |> Enum.filter(&(&1 != ""))
+ |> Enum.map(&String.trim_leading(&1, "#"))
+ |> Enum.filter(&(&1 != ""))
+
+ if Enum.empty?(search_terms) do
+ []
+ else
+ # Use PostgreSQL's ANY operator with array for efficient multi-term search
+ # This is much more efficient than multiple OR clauses
+ search_patterns = Enum.map(search_terms, &"%#{&1}%")
+
+ # Create ranking query that prioritizes exact matches and closer matches
+ # Use a subquery to properly handle computed columns in ORDER BY
+ base_query =
+ from(ht in Hashtag,
+ where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
+ select: %{
+ name: ht.name,
+ # Ranking: exact matches get highest priority (0)
+ # then prefix matches (1), then contains (2)
+ match_rank:
+ fragment(
+ """
+ CASE
+ WHEN LOWER(?) = ANY(?) THEN 0
+ WHEN LOWER(?) LIKE ANY(?) THEN 1
+ ELSE 2
+ END
+ """,
+ ht.name,
+ ^search_terms,
+ ht.name,
+ ^Enum.map(search_terms, &"#{&1}%")
+ ),
+ # Secondary sort by name length (shorter names first)
+ name_length: fragment("LENGTH(?)", ht.name)
+ }
+ )
+
+ from(result in subquery(base_query),
+ order_by: [
+ asc: result.match_rank,
+ asc: result.name_length,
+ asc: result.name
+ ],
+ limit: ^limit,
+ offset: ^offset
+ )
+ |> Repo.all()
+ |> Enum.map(& &1.name)
+ end
+ end
end
diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
@@ -5,6 +5,7 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller
+ alias Pleroma.Hashtag
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.ControllerHelper
@@ -120,69 +121,14 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
defp resource_search(:v2, "hashtags", query, options) do
tags_path = Endpoint.url() <> "/tag/"
- query
- |> prepare_tags(options)
+ Hashtag.search(query, options)
|> Enum.map(fn tag ->
%{name: tag, url: tags_path <> tag}
end)
end
defp resource_search(:v1, "hashtags", query, options) do
- prepare_tags(query, options)
- end
-
- defp prepare_tags(query, options) do
- tags =
- query
- |> preprocess_uri_query()
- |> String.split(~r/[^#\w]+/u, trim: true)
- |> Enum.uniq_by(&String.downcase/1)
-
- explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end)
-
- tags =
- if Enum.any?(explicit_tags) do
- explicit_tags
- else
- tags
- end
-
- tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end)
-
- tags =
- if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do
- add_joined_tag(tags)
- else
- tags
- end
-
- Pleroma.Pagination.paginate_list(tags, options)
- end
-
- defp add_joined_tag(tags) do
- tags
- |> Kernel.++([joined_tag(tags)])
- |> Enum.uniq_by(&String.downcase/1)
- end
-
- # If `query` is a URI, returns last component of its path, otherwise returns `query`
- defp preprocess_uri_query(query) do
- if query =~ ~r/https?:\/\// do
- query
- |> String.trim_trailing("/")
- |> URI.parse()
- |> Map.get(:path)
- |> String.split("/")
- |> Enum.at(-1)
- else
- query
- end
- end
-
- defp joined_tag(tags) do
- tags
- |> Enum.map(fn tag -> String.capitalize(tag) end)
- |> Enum.join()
+ Hashtag.search(query, options)
end
defp with_fallback(f, fallback \\ []) do
diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs
@@ -14,4 +14,133 @@ defmodule Pleroma.HashtagTest do
assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors
end
end
+
+ describe "search_hashtags" do
+ test "searches hashtags by partial match" do
+ {:ok, _} = Hashtag.get_or_create_by_name("car")
+ {:ok, _} = Hashtag.get_or_create_by_name("racecar")
+ {:ok, _} = Hashtag.get_or_create_by_name("nascar")
+ {:ok, _} = Hashtag.get_or_create_by_name("bicycle")
+
+ results = Hashtag.search("car")
+ assert "car" in results
+ assert "racecar" in results
+ assert "nascar" in results
+ refute "bicycle" in results
+
+ results = Hashtag.search("race")
+ assert "racecar" in results
+ refute "car" in results
+ refute "nascar" in results
+ refute "bicycle" in results
+
+ results = Hashtag.search("nonexistent")
+ assert results == []
+ end
+
+ test "searches hashtags by multiple words in query" do
+ {:ok, _} = Hashtag.get_or_create_by_name("computer")
+ {:ok, _} = Hashtag.get_or_create_by_name("laptop")
+ {:ok, _} = Hashtag.get_or_create_by_name("desktop")
+ {:ok, _} = Hashtag.get_or_create_by_name("phone")
+
+ # Search for "new computer" - should return "computer"
+ results = Hashtag.search("new computer")
+ assert "computer" in results
+ refute "laptop" in results
+ refute "desktop" in results
+ refute "phone" in results
+
+ # Search for "computer laptop" - should return both
+ results = Hashtag.search("computer laptop")
+ assert "computer" in results
+ assert "laptop" in results
+ refute "desktop" in results
+ refute "phone" in results
+
+ # Search for "new phone" - should return "phone"
+ results = Hashtag.search("new phone")
+ assert "phone" in results
+ refute "computer" in results
+ refute "laptop" in results
+ refute "desktop" in results
+ end
+
+ test "supports pagination" do
+ {:ok, _} = Hashtag.get_or_create_by_name("alpha")
+ {:ok, _} = Hashtag.get_or_create_by_name("beta")
+ {:ok, _} = Hashtag.get_or_create_by_name("gamma")
+ {:ok, _} = Hashtag.get_or_create_by_name("delta")
+
+ results = Hashtag.search("a", limit: 2)
+ assert length(results) == 2
+
+ results = Hashtag.search("a", limit: 2, offset: 1)
+ assert length(results) == 2
+ end
+
+ test "handles matching many search terms" do
+ {:ok, _} = Hashtag.get_or_create_by_name("computer")
+ {:ok, _} = Hashtag.get_or_create_by_name("laptop")
+ {:ok, _} = Hashtag.get_or_create_by_name("phone")
+ {:ok, _} = Hashtag.get_or_create_by_name("tablet")
+
+ results = Hashtag.search("new fast computer laptop phone tablet device")
+ assert "computer" in results
+ assert "laptop" in results
+ assert "phone" in results
+ assert "tablet" in results
+ end
+
+ test "ranks results by match quality" do
+ {:ok, _} = Hashtag.get_or_create_by_name("my_computer")
+ {:ok, _} = Hashtag.get_or_create_by_name("computer_science")
+ {:ok, _} = Hashtag.get_or_create_by_name("computer")
+
+ results = Hashtag.search("computer")
+
+ # Exact match first
+ assert Enum.at(results, 0) == "computer"
+
+ # Prefix match would be next
+ assert Enum.at(results, 1) == "computer_science"
+
+ # worst match is last
+ assert Enum.at(results, 2) == "my_computer"
+ end
+
+ test "prioritizes shorter names when ranking is equal" do
+ # Create hashtags with same ranking but different lengths
+ {:ok, _} = Hashtag.get_or_create_by_name("car")
+ {:ok, _} = Hashtag.get_or_create_by_name("racecar")
+ {:ok, _} = Hashtag.get_or_create_by_name("nascar")
+
+ # Search for "car" - shorter names should come first
+ results = Hashtag.search("car")
+ # Shortest exact match first
+ assert Enum.at(results, 0) == "car"
+ assert "racecar" in results
+ assert "nascar" in results
+ end
+
+ test "handles hashtag symbols in search query" do
+ {:ok, _} = Hashtag.get_or_create_by_name("computer")
+ {:ok, _} = Hashtag.get_or_create_by_name("laptop")
+ {:ok, _} = Hashtag.get_or_create_by_name("phone")
+
+ results_with_hash = Hashtag.search("#computer #laptop")
+ results_without_hash = Hashtag.search("computer laptop")
+
+ assert results_with_hash == results_without_hash
+
+ results_mixed = Hashtag.search("#computer laptop #phone")
+ assert "computer" in results_mixed
+ assert "laptop" in results_mixed
+ assert "phone" in results_mixed
+
+ results_only_hash = Hashtag.search("#computer")
+ results_no_hash = Hashtag.search("computer")
+ assert results_only_hash == results_no_hash
+ end
+ end
end
diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs
@@ -7,7 +7,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
alias Pleroma.Object
alias Pleroma.Web.CommonAPI
- alias Pleroma.Web.Endpoint
import Pleroma.Factory
import ExUnit.CaptureLog
import Tesla.Mock
@@ -66,9 +65,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
[account | _] = results["accounts"]
assert account["id"] == to_string(user_three.id)
- assert results["hashtags"] == [
- %{"name" => "private", "url" => "#{Endpoint.url()}/tag/private"}
- ]
+ assert results["hashtags"] == []
[status] = results["statuses"]
assert status["id"] == to_string(activity.id)
@@ -77,9 +74,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
get(conn, "/api/v2/search?q=天子")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "天子", "url" => "#{Endpoint.url()}/tag/天子"}
- ]
+ assert results["hashtags"] == []
[status] = results["statuses"]
assert status["id"] == to_string(activity.id)
@@ -130,84 +125,97 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
assert [] = results["statuses"]
end
- test "constructs hashtags from search query", %{conn: conn} do
+ test "returns empty results when no hashtags match", %{conn: conn} do
results =
conn
- |> get("/api/v2/search?#{URI.encode_query(%{q: "some text with #explicit #hashtags"})}")
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "nonexistent"})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "explicit", "url" => "#{Endpoint.url()}/tag/explicit"},
- %{"name" => "hashtags", "url" => "#{Endpoint.url()}/tag/hashtags"}
- ]
+ assert results["hashtags"] == []
+ end
+
+ test "searches hashtags by multiple words in query", %{conn: conn} do
+ user = insert(:user)
+
+ {:ok, _activity1} = CommonAPI.post(user, %{status: "This is my new #computer"})
+ {:ok, _activity2} = CommonAPI.post(user, %{status: "Check out this #laptop"})
+ {:ok, _activity3} = CommonAPI.post(user, %{status: "My #desktop setup"})
+ {:ok, _activity4} = CommonAPI.post(user, %{status: "New #phone arrived"})
results =
conn
- |> get("/api/v2/search?#{URI.encode_query(%{q: "john doe JOHN DOE"})}")
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "new computer"})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "john", "url" => "#{Endpoint.url()}/tag/john"},
- %{"name" => "doe", "url" => "#{Endpoint.url()}/tag/doe"},
- %{"name" => "JohnDoe", "url" => "#{Endpoint.url()}/tag/JohnDoe"}
- ]
+ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
+ assert "computer" in hashtag_names
+ refute "laptop" in hashtag_names
+ refute "desktop" in hashtag_names
+ refute "phone" in hashtag_names
results =
conn
- |> get("/api/v2/search?#{URI.encode_query(%{q: "accident-prone"})}")
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "computer laptop"})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "accident", "url" => "#{Endpoint.url()}/tag/accident"},
- %{"name" => "prone", "url" => "#{Endpoint.url()}/tag/prone"},
- %{"name" => "AccidentProne", "url" => "#{Endpoint.url()}/tag/AccidentProne"}
- ]
+ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
+ assert "computer" in hashtag_names
+ assert "laptop" in hashtag_names
+ refute "desktop" in hashtag_names
+ refute "phone" in hashtag_names
+ end
+
+ test "supports pagination of hashtags search results", %{conn: conn} do
+ user = insert(:user)
+
+ {:ok, _activity1} = CommonAPI.post(user, %{status: "First #alpha hashtag"})
+ {:ok, _activity2} = CommonAPI.post(user, %{status: "Second #beta hashtag"})
+ {:ok, _activity3} = CommonAPI.post(user, %{status: "Third #gamma hashtag"})
+ {:ok, _activity4} = CommonAPI.post(user, %{status: "Fourth #delta hashtag"})
results =
conn
- |> get("/api/v2/search?#{URI.encode_query(%{q: "https://shpposter.club/users/shpuld"})}")
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "a", limit: 2, offset: 1})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "shpuld", "url" => "#{Endpoint.url()}/tag/shpuld"}
- ]
+ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
+
+ # Should return 2 hashtags (alpha, beta, gamma, delta all contain 'a')
+ # With offset 1, we skip the first one, so we get 2 of the remaining 3
+ assert length(hashtag_names) == 2
+ assert Enum.all?(hashtag_names, &String.contains?(&1, "a"))
+ end
+
+ test "searches real hashtags from database", %{conn: conn} do
+ user = insert(:user)
+
+ {:ok, _activity1} = CommonAPI.post(user, %{status: "Check out this #car"})
+ {:ok, _activity2} = CommonAPI.post(user, %{status: "Fast #racecar on the track"})
+ {:ok, _activity3} = CommonAPI.post(user, %{status: "NASCAR #nascar racing"})
results =
conn
- |> get(
- "/api/v2/search?#{URI.encode_query(%{q: "https://www.washingtonpost.com/sports/2020/06/10/" <> "nascar-ban-display-confederate-flag-all-events-properties/"})}"
- )
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "car"})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "nascar", "url" => "#{Endpoint.url()}/tag/nascar"},
- %{"name" => "ban", "url" => "#{Endpoint.url()}/tag/ban"},
- %{"name" => "display", "url" => "#{Endpoint.url()}/tag/display"},
- %{"name" => "confederate", "url" => "#{Endpoint.url()}/tag/confederate"},
- %{"name" => "flag", "url" => "#{Endpoint.url()}/tag/flag"},
- %{"name" => "all", "url" => "#{Endpoint.url()}/tag/all"},
- %{"name" => "events", "url" => "#{Endpoint.url()}/tag/events"},
- %{"name" => "properties", "url" => "#{Endpoint.url()}/tag/properties"},
- %{
- "name" => "NascarBanDisplayConfederateFlagAllEventsProperties",
- "url" =>
- "#{Endpoint.url()}/tag/NascarBanDisplayConfederateFlagAllEventsProperties"
- }
- ]
- end
+ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
- test "supports pagination of hashtags search results", %{conn: conn} do
+ # Should return car, racecar, and nascar since they all contain "car"
+ assert "car" in hashtag_names
+ assert "racecar" in hashtag_names
+ assert "nascar" in hashtag_names
+
+ # Search for "race" - should return racecar
results =
conn
- |> get(
- "/api/v2/search?#{URI.encode_query(%{q: "#some #text #with #hashtags", limit: 2, offset: 1})}"
- )
+ |> get("/api/v2/search?#{URI.encode_query(%{q: "race"})}")
|> json_response_and_validate_schema(200)
- assert results["hashtags"] == [
- %{"name" => "text", "url" => "#{Endpoint.url()}/tag/text"},
- %{"name" => "with", "url" => "#{Endpoint.url()}/tag/with"}
- ]
+ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
+
+ assert "racecar" in hashtag_names
+ refute "car" in hashtag_names
+ refute "nascar" in hashtag_names
end
test "excludes a blocked users from search results", %{conn: conn} do
@@ -314,7 +322,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
[account | _] = results["accounts"]
assert account["id"] == to_string(user_three.id)
- assert results["hashtags"] == ["2hu"]
+ assert results["hashtags"] == []
[status] = results["statuses"]
assert status["id"] == to_string(activity.id)