From 0ffacca796681f1ee3019f56b06e7e0933d57536 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 18 Aug 2022 23:48:00 +0200 Subject: [PATCH] Start support for pruning old inbox data --- app/config.py | 4 +++ app/prune.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ tasks.py | 8 ++++++ 3 files changed, 89 insertions(+) create mode 100644 app/prune.py diff --git a/app/config.py b/app/config.py index 9df50a9..4e0b5ba 100644 --- a/app/config.py +++ b/app/config.py @@ -72,6 +72,8 @@ class Config(pydantic.BaseModel): code_highlighting_theme = "friendly_grayscale" blocked_servers: list[_BlockedServer] = [] + inbox_retention_days: int = 15 + # Config items to make tests easier sqlalchemy_database: str | None = None key_path: str | None = None @@ -118,6 +120,8 @@ if CONFIG.privacy_replace: BLOCKED_SERVERS = {blocked_server.hostname for blocked_server in CONFIG.blocked_servers} +INBOX_RETENTION_DAYS = CONFIG.inbox_retention_days + BASE_URL = ID DEBUG = CONFIG.debug DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db" diff --git a/app/prune.py b/app/prune.py new file mode 100644 index 0000000..877b58f --- /dev/null +++ b/app/prune.py @@ -0,0 +1,77 @@ +from datetime import timedelta + +from loguru import logger +from sqlalchemy import and_ +from sqlalchemy import delete +from sqlalchemy import not_ + +from app import activitypub as ap +from app import models +from app.config import BASE_URL +from app.config import INBOX_RETENTION_DAYS +from app.database import AsyncSession +from app.database import async_session +from app.utils.datetime import now + + +async def prune_old_data( + db_session: AsyncSession, +) -> None: + logger.info(f"Pruning old data with {INBOX_RETENTION_DAYS=}") + await _prune_old_incoming_activities(db_session) + await _prune_old_inbox_objects(db_session) + + await db_session.commit() + # Reclaim disk space + await db_session.execute("VACUUM") # type: ignore + + +async def _prune_old_incoming_activities( + db_session: AsyncSession, +) -> None: + result = await db_session.execute( + delete(models.IncomingActivity) + .where( + models.IncomingActivity.created_at + < now() - timedelta(days=INBOX_RETENTION_DAYS), + # Keep failed activity for debug + models.IncomingActivity.is_errored.is_(False), + ) + .execution_options(synchronize_session=False) + ) + logger.info(f"Deleted {result.rowcount} old incoming activities") # type: ignore + + +async def _prune_old_inbox_objects( + db_session: AsyncSession, +) -> None: + result = await db_session.execute( + delete(models.InboxObject) + .where( + # Keep bookmarked objects + models.InboxObject.is_bookmarked.is_(False), + # Keep liked objects + models.InboxObject.liked_via_outbox_object_ap_id.is_(None), + # Keep announced objects + models.InboxObject.announced_via_outbox_object_ap_id.is_(None), + # Keep objects related to local conversations + models.InboxObject.conversation.not_like(f"{BASE_URL}/%"), + # Keep direct messages + not_( + and_( + models.InboxObject.visibility == ap.VisibilityEnum.DIRECT, + models.InboxObject.ap_type.in_(["Note"]), + ) + ), + # Filter by retention days + models.InboxObject.ap_published_at + < now() - timedelta(days=INBOX_RETENTION_DAYS), + ) + .execution_options(synchronize_session=False) + ) + logger.info(f"Deleted {result.rowcount} old inbox objects") # type: ignore + + +async def run_prune_old_data() -> None: + async with async_session() as db_session: + await prune_old_data(db_session) diff --git a/tasks.py b/tasks.py index e2e47b6..58f14f6 100644 --- a/tasks.py +++ b/tasks.py @@ -181,3 +181,11 @@ def build_docker_image(ctx): # type: (Context) -> None with embed_version(): run("docker build -t microblogpub/microblogpub .") + + +@task +def prune_old_data(ctx): + # type: (Context) -> None + from app.prune import run_prune_old_data + + asyncio.run(run_prune_old_data())