microblog.pub/app/prune.py

118 lines
4.1 KiB
Python
Raw Normal View History

from datetime import timedelta
from loguru import logger
from sqlalchemy import and_
from sqlalchemy import delete
2022-08-19 12:50:56 +00:00
from sqlalchemy import func
from sqlalchemy import not_
2022-08-18 22:03:49 +00:00
from sqlalchemy import or_
2022-08-19 12:50:56 +00:00
from sqlalchemy import select
from app import activitypub as ap
from app import models
from app.config import BASE_URL
from app.config import INBOX_RETENTION_DAYS
from app.database import AsyncSession
from app.database import async_session
from app.utils.datetime import now
async def prune_old_data(
db_session: AsyncSession,
) -> None:
logger.info(f"Pruning old data with {INBOX_RETENTION_DAYS=}")
await _prune_old_incoming_activities(db_session)
2022-08-19 12:50:56 +00:00
await _prune_old_outgoing_activities(db_session)
await _prune_old_inbox_objects(db_session)
# TODO: delete actor with no remaining inbox objects
await db_session.commit()
# Reclaim disk space
await db_session.execute("VACUUM") # type: ignore
async def _prune_old_incoming_activities(
db_session: AsyncSession,
) -> None:
result = await db_session.execute(
delete(models.IncomingActivity)
.where(
models.IncomingActivity.created_at
< now() - timedelta(days=INBOX_RETENTION_DAYS),
# Keep failed activity for debug
models.IncomingActivity.is_errored.is_(False),
)
.execution_options(synchronize_session=False)
)
logger.info(f"Deleted {result.rowcount} old incoming activities") # type: ignore
2022-08-19 12:50:56 +00:00
async def _prune_old_outgoing_activities(
db_session: AsyncSession,
) -> None:
result = await db_session.execute(
delete(models.OutgoingActivity)
.where(
models.OutgoingActivity.created_at
< now() - timedelta(days=INBOX_RETENTION_DAYS),
# Keep failed activity for debug
models.OutgoingActivity.is_errored.is_(False),
)
.execution_options(synchronize_session=False)
)
logger.info(f"Deleted {result.rowcount} old outgoing activities") # type: ignore
async def _prune_old_inbox_objects(
db_session: AsyncSession,
) -> None:
2022-08-19 12:50:56 +00:00
outbox_conversation = select(func.distinct(models.OutboxObject.conversation)).where(
models.OutboxObject.conversation.is_not(None),
models.OutboxObject.conversation.not_like(f"{BASE_URL}%"),
)
result = await db_session.execute(
delete(models.InboxObject)
.where(
# Keep bookmarked objects
models.InboxObject.is_bookmarked.is_(False),
# Keep liked objects
models.InboxObject.liked_via_outbox_object_ap_id.is_(None),
# Keep announced objects
models.InboxObject.announced_via_outbox_object_ap_id.is_(None),
2022-08-19 12:50:56 +00:00
# Keep objects mentioning the local actor
models.InboxObject.has_local_mention.is_(False),
2022-08-19 07:11:22 +00:00
# Keep objects related to local conversations (i.e. don't break the
# public website)
2022-08-18 22:03:49 +00:00
or_(
2022-08-18 22:13:23 +00:00
models.InboxObject.conversation.not_like(f"{BASE_URL}%"),
2022-08-18 22:03:49 +00:00
models.InboxObject.conversation.is_(None),
2022-08-19 12:50:56 +00:00
models.InboxObject.conversation.not_in(outbox_conversation),
2022-08-18 22:03:49 +00:00
),
# Keep activities related to the outbox (like Like/Announce/Follow...)
or_(
2022-08-19 07:11:22 +00:00
# XXX: no `/` here because the local ID does not have one
models.InboxObject.activity_object_ap_id.not_like(f"{BASE_URL}%"),
2022-08-18 22:03:49 +00:00
models.InboxObject.activity_object_ap_id.is_(None),
),
# Keep direct messages
not_(
and_(
models.InboxObject.visibility == ap.VisibilityEnum.DIRECT,
models.InboxObject.ap_type.in_(["Note"]),
)
),
# Filter by retention days
models.InboxObject.ap_published_at
< now() - timedelta(days=INBOX_RETENTION_DAYS),
)
.execution_options(synchronize_session=False)
)
logger.info(f"Deleted {result.rowcount} old inbox objects") # type: ignore
async def run_prune_old_data() -> None:
"""CLI entrypoint."""
async with async_session() as db_session:
await prune_old_data(db_session)