forked from forks/microblog.pub
Start support for pruning old inbox data
This commit is contained in:
parent
08618c3c72
commit
0ffacca796
3 changed files with 89 additions and 0 deletions
|
@ -72,6 +72,8 @@ class Config(pydantic.BaseModel):
|
|||
code_highlighting_theme = "friendly_grayscale"
|
||||
blocked_servers: list[_BlockedServer] = []
|
||||
|
||||
inbox_retention_days: int = 15
|
||||
|
||||
# Config items to make tests easier
|
||||
sqlalchemy_database: str | None = None
|
||||
key_path: str | None = None
|
||||
|
@ -118,6 +120,8 @@ if CONFIG.privacy_replace:
|
|||
|
||||
BLOCKED_SERVERS = {blocked_server.hostname for blocked_server in CONFIG.blocked_servers}
|
||||
|
||||
INBOX_RETENTION_DAYS = CONFIG.inbox_retention_days
|
||||
|
||||
BASE_URL = ID
|
||||
DEBUG = CONFIG.debug
|
||||
DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db"
|
||||
|
|
77
app/prune.py
Normal file
77
app/prune.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from datetime import timedelta
|
||||
|
||||
from loguru import logger
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy import not_
|
||||
|
||||
from app import activitypub as ap
|
||||
from app import models
|
||||
from app.config import BASE_URL
|
||||
from app.config import INBOX_RETENTION_DAYS
|
||||
from app.database import AsyncSession
|
||||
from app.database import async_session
|
||||
from app.utils.datetime import now
|
||||
|
||||
|
||||
async def prune_old_data(
|
||||
db_session: AsyncSession,
|
||||
) -> None:
|
||||
logger.info(f"Pruning old data with {INBOX_RETENTION_DAYS=}")
|
||||
await _prune_old_incoming_activities(db_session)
|
||||
await _prune_old_inbox_objects(db_session)
|
||||
|
||||
await db_session.commit()
|
||||
# Reclaim disk space
|
||||
await db_session.execute("VACUUM") # type: ignore
|
||||
|
||||
|
||||
async def _prune_old_incoming_activities(
|
||||
db_session: AsyncSession,
|
||||
) -> None:
|
||||
result = await db_session.execute(
|
||||
delete(models.IncomingActivity)
|
||||
.where(
|
||||
models.IncomingActivity.created_at
|
||||
< now() - timedelta(days=INBOX_RETENTION_DAYS),
|
||||
# Keep failed activity for debug
|
||||
models.IncomingActivity.is_errored.is_(False),
|
||||
)
|
||||
.execution_options(synchronize_session=False)
|
||||
)
|
||||
logger.info(f"Deleted {result.rowcount} old incoming activities") # type: ignore
|
||||
|
||||
|
||||
async def _prune_old_inbox_objects(
|
||||
db_session: AsyncSession,
|
||||
) -> None:
|
||||
result = await db_session.execute(
|
||||
delete(models.InboxObject)
|
||||
.where(
|
||||
# Keep bookmarked objects
|
||||
models.InboxObject.is_bookmarked.is_(False),
|
||||
# Keep liked objects
|
||||
models.InboxObject.liked_via_outbox_object_ap_id.is_(None),
|
||||
# Keep announced objects
|
||||
models.InboxObject.announced_via_outbox_object_ap_id.is_(None),
|
||||
# Keep objects related to local conversations
|
||||
models.InboxObject.conversation.not_like(f"{BASE_URL}/%"),
|
||||
# Keep direct messages
|
||||
not_(
|
||||
and_(
|
||||
models.InboxObject.visibility == ap.VisibilityEnum.DIRECT,
|
||||
models.InboxObject.ap_type.in_(["Note"]),
|
||||
)
|
||||
),
|
||||
# Filter by retention days
|
||||
models.InboxObject.ap_published_at
|
||||
< now() - timedelta(days=INBOX_RETENTION_DAYS),
|
||||
)
|
||||
.execution_options(synchronize_session=False)
|
||||
)
|
||||
logger.info(f"Deleted {result.rowcount} old inbox objects") # type: ignore
|
||||
|
||||
|
||||
async def run_prune_old_data() -> None:
|
||||
async with async_session() as db_session:
|
||||
await prune_old_data(db_session)
|
8
tasks.py
8
tasks.py
|
@ -181,3 +181,11 @@ def build_docker_image(ctx):
|
|||
# type: (Context) -> None
|
||||
with embed_version():
|
||||
run("docker build -t microblogpub/microblogpub .")
|
||||
|
||||
|
||||
@task
|
||||
def prune_old_data(ctx):
|
||||
# type: (Context) -> None
|
||||
from app.prune import run_prune_old_data
|
||||
|
||||
asyncio.run(run_prune_old_data())
|
||||
|
|
Loading…
Reference in a new issue