logo

pleroma

My custom branche(s) on git.pleroma.social/pleroma/pleroma git clone https://hacktivis.me/git/pleroma.git
commit: 88c1c76d3eca3412d1e02008f1b8d96fe8fe0b96
parent bb02ee99f58e378e33162211f41fe5979d5da8ae
Author: Hélène <pleroma-dev@helene.moe>
Date:   Mon, 15 Aug 2022 01:15:23 +0200

Migrations: delete contexts with BaseMigrator

Due to the lengthiness of this task, the migration has been adapted into
a BaseMigrator migration, running in the background instead.

Diffstat:

Mconfig/config.exs2++
Mconfig/description.exs21+++++++++++++++++++++
Mlib/pleroma/application.ex3++-
Mlib/pleroma/data_migration.ex1+
Alib/pleroma/migrators/context_objects_deletion_migrator.ex139+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mpriv/repo/migrations/20220807125023_data_migration_delete_context_objects.exs13++++++++-----
6 files changed, 173 insertions(+), 6 deletions(-)

diff --git a/config/config.exs b/config/config.exs @@ -673,6 +673,8 @@ config :pleroma, :features, improved_hashtag_timeline: :auto config :pleroma, :populate_hashtags_table, fault_rate_allowance: 0.01 +config :pleroma, :delete_context_objects, fault_rate_allowance: 0.01 + config :pleroma, :env, Mix.env() config :http_signatures, diff --git a/config/description.exs b/config/description.exs @@ -497,6 +497,27 @@ config :pleroma, :config_description, [ }, %{ group: :pleroma, + key: :delete_context_objects, + type: :group, + description: "`delete_context_objects` background migration settings", + children: [ + %{ + key: :fault_rate_allowance, + type: :float, + description: + "Max accepted rate of objects that failed in the migration. Any value from 0.0 which tolerates no errors to 1.0 which will enable the feature even if context object deletion failed for all records.", + suggestions: [0.01] + }, + %{ + key: :sleep_interval_ms, + type: :integer, + description: + "Sleep interval between each chunk of processed records in order to decrease the load on the system (defaults to 0 and should be keep default on most instances)." + } + ] + }, + %{ + group: :pleroma, key: :instance, type: :group, description: "Instance-related settings", diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex @@ -238,7 +238,8 @@ defmodule Pleroma.Application do defp background_migrators do [ - Pleroma.Migrators.HashtagsTableMigrator + Pleroma.Migrators.HashtagsTableMigrator, + Pleroma.Migrators.ContextObjectsDeletionMigrator ] end diff --git a/lib/pleroma/data_migration.ex b/lib/pleroma/data_migration.ex @@ -42,4 +42,5 @@ defmodule Pleroma.DataMigration do end def populate_hashtags_table, do: get_by_name("populate_hashtags_table") + def delete_context_objects, do: get_by_name("delete_context_objects") end diff --git a/lib/pleroma/migrators/context_objects_deletion_migrator.ex b/lib/pleroma/migrators/context_objects_deletion_migrator.ex @@ -0,0 +1,139 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Migrators.ContextObjectsDeletionMigrator do + defmodule State do + use Pleroma.Migrators.Support.BaseMigratorState + + @impl Pleroma.Migrators.Support.BaseMigratorState + defdelegate data_migration(), to: Pleroma.DataMigration, as: :delete_context_objects + end + + use Pleroma.Migrators.Support.BaseMigrator + + alias Pleroma.Migrators.Support.BaseMigrator + alias Pleroma.Object + + @doc "This migration removes objects created exclusively for contexts, containing only an `id` field." + + @impl BaseMigrator + def feature_config_path, do: [:features, :delete_context_objects] + + @impl BaseMigrator + def fault_rate_allowance, do: Config.get([:delete_context_objects, :fault_rate_allowance], 0) + + @impl BaseMigrator + def perform do + data_migration_id = data_migration_id() + max_processed_id = get_stat(:max_processed_id, 0) + + Logger.info("Deleting context objects from `objects` (from oid: #{max_processed_id})...") + + query() + |> where([object], object.id > ^max_processed_id) + |> Repo.chunk_stream(100, :batches, timeout: :infinity) + |> Stream.each(fn objects -> + object_ids = Enum.map(objects, & &1.id) + + results = Enum.map(object_ids, &delete_context_object(&1)) + + failed_ids = + results + |> Enum.filter(&(elem(&1, 0) == :error)) + |> Enum.map(&elem(&1, 1)) + + chunk_affected_count = + results + |> Enum.filter(&(elem(&1, 0) == :ok)) + |> length() + + for failed_id <- failed_ids do + _ = + Repo.query( + "INSERT INTO data_migration_failed_ids(data_migration_id, record_id) " <> + "VALUES ($1, $2) ON CONFLICT DO NOTHING;", + [data_migration_id, failed_id] + ) + end + + _ = + Repo.query( + "DELETE FROM data_migration_failed_ids " <> + "WHERE data_migration_id = $1 AND record_id = ANY($2)", + [data_migration_id, object_ids -- failed_ids] + ) + + max_object_id = Enum.at(object_ids, -1) + + put_stat(:max_processed_id, max_object_id) + increment_stat(:iteration_processed_count, length(object_ids)) + increment_stat(:processed_count, length(object_ids)) + increment_stat(:failed_count, length(failed_ids)) + increment_stat(:affected_count, chunk_affected_count) + put_stat(:records_per_second, records_per_second()) + persist_state() + + # A quick and dirty approach to controlling the load this background migration imposes + sleep_interval = Config.get([:delete_context_objects, :sleep_interval_ms], 0) + Process.sleep(sleep_interval) + end) + |> Stream.run() + end + + @impl BaseMigrator + def query do + # Context objects have no activity type, and only one field, `id`. + # Only those context objects are without types. + from( + object in Object, + where: fragment("(?)->'type' IS NULL", object.data), + select: %{ + id: object.id + } + ) + end + + @spec delete_context_object(integer()) :: {:ok | :error, integer()} + defp delete_context_object(id) do + result = + %Object{id: id} + |> Repo.delete() + |> elem(0) + + {result, id} + end + + @impl BaseMigrator + def retry_failed do + data_migration_id = data_migration_id() + + failed_objects_query() + |> Repo.chunk_stream(100, :one) + |> Stream.each(fn object -> + with {res, _} when res != :error <- delete_context_object(object.id) do + _ = + Repo.query( + "DELETE FROM data_migration_failed_ids " <> + "WHERE data_migration_id = $1 AND record_id = $2", + [data_migration_id, object.id] + ) + end + end) + |> Stream.run() + + put_stat(:failed_count, failures_count()) + persist_state() + + force_continue() + end + + defp failed_objects_query do + from(o in Object) + |> join(:inner, [o], dmf in fragment("SELECT * FROM data_migration_failed_ids"), + on: dmf.record_id == o.id + ) + |> where([_o, dmf], dmf.data_migration_id == ^data_migration_id()) + |> order_by([o], asc: o.id) + end +end diff --git a/priv/repo/migrations/20220807125023_data_migration_delete_context_objects.exs b/priv/repo/migrations/20220807125023_data_migration_delete_context_objects.exs @@ -3,13 +3,16 @@ defmodule Pleroma.Repo.Migrations.DataMigrationDeleteContextObjects do require Logger - @doc "This migration removes objects created exclusively for contexts, containing only an `id` field." + def up do + dt = NaiveDateTime.utc_now() - def change do - Logger.warn( - "This migration can take a very long time to execute, depending on your database size. Please be patient, Pleroma-tan is doing her best!\n" + execute( + "INSERT INTO data_migrations(name, inserted_at, updated_at) " <> + "VALUES ('delete_context_objects', '#{dt}', '#{dt}') ON CONFLICT DO NOTHING;" ) + end - execute("DELETE FROM objects WHERE (data->>'type') IS NULL;") + def down do + execute("DELETE FROM data_migrations WHERE name = 'delete_context_objects';") end end