logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma
commit: d2dacadb6b223449d8eab3d7a32e29da9f2adde7
parent: e5b34f5e0544371603bc2b570c26ede3182c2f8b
Author: rinpatch <rinpatch@sdf.org>
Date:   Fri, 17 May 2019 16:35:19 +0000

Merge branch 'rum-index' into 'develop'

Search: Use RUM index.

See merge request pleroma/pleroma!1136

Diffstat:

M.gitlab-ci.yml19+++++++++++++++++--
Mconfig/config.exs2++
Mconfig/test.exs4++++
Mdocs/config.md15+++++++++++++++
Mlib/pleroma/web/mastodon_api/mastodon_api_controller.ex43+++++++++++++++++++++++++++++++++----------
Apriv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs34++++++++++++++++++++++++++++++++++
6 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml @@ -45,7 +45,8 @@ docs-build: unit-testing: stage: test services: - - name: postgres:9.6.2 + - name: lainsoykaf/postgres-with-rum + alias: postgres command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] script: - mix deps.get @@ -54,6 +55,21 @@ unit-testing: - mix test --trace --preload-modules - mix coveralls +unit-testing-rum: + stage: test + services: + - name: lainsoykaf/postgres-with-rum + alias: postgres + command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] + variables: + RUM_ENABLED: "true" + script: + - mix deps.get + - mix ecto.create + - mix ecto.migrate + - "mix ecto.migrate --migrations-path priv/repo/optional_migrations/rum_indexing/" + - mix test --trace --preload-modules + lint: stage: test script: @@ -65,7 +81,6 @@ analysis: - mix deps.get - mix credo --strict --only=warnings,todo,fixme,consistency,readability - docs-deploy: stage: deploy image: alpine:3.9 diff --git a/config/config.exs b/config/config.exs @@ -465,6 +465,8 @@ config :pleroma, :oauth2, token_expires_in: 600, issue_new_refresh_token: true +config :pleroma, :database, rum_enabled: false + config :http_signatures, adapter: Pleroma.Signature diff --git a/config/test.exs b/config/test.exs @@ -63,6 +63,10 @@ config :pleroma, :app_account_creation, max_requests: 5 config :pleroma, :http_security, report_uri: "https://endpoint.com" +rum_enabled = System.get_env("RUM_ENABLED") == "true" +config :pleroma, :database, rum_enabled: rum_enabled +IO.puts("RUM enabled: #{rum_enabled}") + try do import_config "test.secret.exs" rescue diff --git a/docs/config.md b/docs/config.md @@ -544,3 +544,18 @@ Configure OAuth 2 provider capabilities: * `shortcode_globs`: Location of custom emoji files. `*` can be used as a wildcard. Example `["/emoji/custom/**/*.png"]` * `groups`: Emojis are ordered in groups (tags). This is an array of key-value pairs where the key is the groupname and the value the location or array of locations. `*` can be used as a wildcard. Example `[Custom: ["/emoji/*.png", "/emoji/custom/*.png"]]` * `default_manifest`: Location of the JSON-manifest. This manifest contains information about the emoji-packs you can download. Currently only one manifest can be added (no arrays). + +## Database options + +### RUM indexing for full text search +* `rum_enabled`: If RUM indexes should be used. Defaults to `false`. + +RUM indexes are an alternative indexing scheme that is not included in PostgreSQL by default. While they may eventually be mainlined, for now they have to be installed as a PostgreSQL extension from https://github.com/postgrespro/rum. + +Their advantage over the standard GIN indexes is that they allow efficient ordering of search results by timestamp, which makes search queries a lot faster on larger servers, by one or two orders of magnitude. They take up around 3 times as much space as GIN indexes. + +To enable them, both the `rum_enabled` flag has to be set and the following special migration has to be run: + +`mix ecto.migrate --migrations-path priv/repo/optional_migrations/rum_indexing/` + +This will probably take a long time. diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -1009,6 +1009,30 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end end + def status_search_query_with_gin(q, query) do + from([a, o] in q, + where: + fragment( + "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", + o.data, + ^query + ), + order_by: [desc: :id] + ) + end + + def status_search_query_with_rum(q, query) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery('english', ?)", + o.fts_content, + ^query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + def status_search(user, query) do fetched = if Regex.match?(~r/https?:/, query) do @@ -1022,20 +1046,19 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do end || [] q = - from( - [a, o] in Activity.with_preloaded_object(Activity), + from([a, o] in Activity.with_preloaded_object(Activity), where: fragment("?->>'type' = 'Create'", a.data), where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, - where: - fragment( - "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", - o.data, - ^query - ), - limit: 20, - order_by: [desc: :id] + limit: 20 ) + q = + if Pleroma.Config.get([:database, :rum_enabled]) do + status_search_query_with_rum(q, query) + else + status_search_query_with_gin(q, query) + end + Repo.all(q) ++ fetched end diff --git a/priv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs b/priv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs @@ -0,0 +1,34 @@ +defmodule Pleroma.Repo.Migrations.AddFtsIndexToObjectsTwo do + use Ecto.Migration + + def up do + execute("create extension if not exists rum") + drop_if_exists index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts) + alter table(:objects) do + add(:fts_content, :tsvector) + end + + execute("CREATE FUNCTION objects_fts_update() RETURNS trigger AS $$ + begin + new.fts_content := to_tsvector('english', new.data->>'content'); + return new; + end + $$ LANGUAGE plpgsql") + execute("create index objects_fts on objects using RUM (fts_content rum_tsvector_addon_ops, inserted_at) with (attach = 'inserted_at', to = 'fts_content');") + + execute("CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE ON objects + FOR EACH ROW EXECUTE PROCEDURE objects_fts_update()") + + execute("UPDATE objects SET updated_at = NOW()") + end + + def down do + execute "drop index objects_fts" + execute "drop trigger tsvectorupdate on objects" + execute "drop function objects_fts_update()" + alter table(:objects) do + remove(:fts_content, :tsvector) + end + create index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts) + end +end