From 225f87ad62902cce8db71d30c23801fd9ed7ee05 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 23 Oct 2023 17:29:02 +0200 Subject: [PATCH] Also allow limiting the initial prune_object May sometimes be helpful to get more predictable runtime than just with an age-based limit. The subquery for the non-keep-threads path is required since delte_all does not directly accept limit(). Again most of the diff is just adjusting indentation, best hide whitespace-only changes with git diff -w or similar. --- CHANGELOG.md | 1 + .../docs/administration/CLI_tasks/database.md | 1 + lib/mix/tasks/pleroma/database.ex | 61 +++++++++++++------ 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42411f491..aea6dc677 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,6 +107,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - ability to auto-approve follow requests from users you are already following - The SimplePolicy MRF can now strip user backgrounds from selected remote hosts - New standalone `prune_orphaned_activities` mix task with configurable batch limit +- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning ## Changed - OTP builds are now built on erlang OTP26 diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index c57817bf4..bbf29fc60 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -50,6 +50,7 @@ This will prune remote posts older than 90 days (configurable with [`config :ple - `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity). - `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. +- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task. - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. - `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning. diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 083f73fe2..b8f19551a 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -20,6 +20,14 @@ defmodule Mix.Tasks.Pleroma.Database do @shortdoc "A collection of database related tasks" @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md") + defp maybe_limit(query, limit_cnt) do + if is_number(limit_cnt) and limit_cnt > 0 do + limit(query, [], ^limit_cnt) + else + query + end + end + def prune_orphaned_activities(limit \\ 0) when is_number(limit) do limit_arg = if limit > 0 do @@ -148,7 +156,8 @@ def run(["prune_objects" | args]) do vacuum: :boolean, keep_threads: :boolean, keep_non_public: :boolean, - prune_orphaned_activities: :boolean + prune_orphaned_activities: :boolean, + limit: :integer ] ) @@ -157,6 +166,8 @@ def run(["prune_objects" | args]) do deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) + limit_cnt = Keyword.get(options, :limit, 0) + log_message = "Pruning objects older than #{deadline} days" log_message = @@ -188,6 +199,13 @@ def run(["prune_objects" | args]) do log_message end + log_message = + if limit_cnt > 0 do + log_message <> ", limiting to #{limit_cnt} rows" + else + log_message + end + Logger.info(log_message) if Keyword.get(options, :keep_threads) do @@ -221,31 +239,38 @@ def run(["prune_objects" | args]) do |> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local)) |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_limit(limit_cnt) |> select([a], fragment("? ->> 'context'::text", a.data)) Pleroma.Object |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) else - if Keyword.get(options, :keep_non_public) do - Pleroma.Object + deletable = + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) |> where( [o], - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ) + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) - else - Pleroma.Object - end - |> where([o], o.updated_at < ^time_deadline) - |> where( - [o], - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) + |> maybe_limit(limit_cnt) + |> select([o], o.id) + + Pleroma.Object + |> where([o], o.id in subquery(deletable)) end |> Repo.delete_all(timeout: :infinity)