mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-22 13:14:28 +00:00
Start support for pruning old inbox data
This commit is contained in:
parent
08618c3c72
commit
0ffacca796
3 changed files with 89 additions and 0 deletions
|
@ -72,6 +72,8 @@ class Config(pydantic.BaseModel):
|
||||||
code_highlighting_theme = "friendly_grayscale"
|
code_highlighting_theme = "friendly_grayscale"
|
||||||
blocked_servers: list[_BlockedServer] = []
|
blocked_servers: list[_BlockedServer] = []
|
||||||
|
|
||||||
|
inbox_retention_days: int = 15
|
||||||
|
|
||||||
# Config items to make tests easier
|
# Config items to make tests easier
|
||||||
sqlalchemy_database: str | None = None
|
sqlalchemy_database: str | None = None
|
||||||
key_path: str | None = None
|
key_path: str | None = None
|
||||||
|
@ -118,6 +120,8 @@ if CONFIG.privacy_replace:
|
||||||
|
|
||||||
BLOCKED_SERVERS = {blocked_server.hostname for blocked_server in CONFIG.blocked_servers}
|
BLOCKED_SERVERS = {blocked_server.hostname for blocked_server in CONFIG.blocked_servers}
|
||||||
|
|
||||||
|
INBOX_RETENTION_DAYS = CONFIG.inbox_retention_days
|
||||||
|
|
||||||
BASE_URL = ID
|
BASE_URL = ID
|
||||||
DEBUG = CONFIG.debug
|
DEBUG = CONFIG.debug
|
||||||
DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db"
|
DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db"
|
||||||
|
|
77
app/prune.py
Normal file
77
app/prune.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy import and_
|
||||||
|
from sqlalchemy import delete
|
||||||
|
from sqlalchemy import not_
|
||||||
|
|
||||||
|
from app import activitypub as ap
|
||||||
|
from app import models
|
||||||
|
from app.config import BASE_URL
|
||||||
|
from app.config import INBOX_RETENTION_DAYS
|
||||||
|
from app.database import AsyncSession
|
||||||
|
from app.database import async_session
|
||||||
|
from app.utils.datetime import now
|
||||||
|
|
||||||
|
|
||||||
|
async def prune_old_data(
|
||||||
|
db_session: AsyncSession,
|
||||||
|
) -> None:
|
||||||
|
logger.info(f"Pruning old data with {INBOX_RETENTION_DAYS=}")
|
||||||
|
await _prune_old_incoming_activities(db_session)
|
||||||
|
await _prune_old_inbox_objects(db_session)
|
||||||
|
|
||||||
|
await db_session.commit()
|
||||||
|
# Reclaim disk space
|
||||||
|
await db_session.execute("VACUUM") # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
async def _prune_old_incoming_activities(
|
||||||
|
db_session: AsyncSession,
|
||||||
|
) -> None:
|
||||||
|
result = await db_session.execute(
|
||||||
|
delete(models.IncomingActivity)
|
||||||
|
.where(
|
||||||
|
models.IncomingActivity.created_at
|
||||||
|
< now() - timedelta(days=INBOX_RETENTION_DAYS),
|
||||||
|
# Keep failed activity for debug
|
||||||
|
models.IncomingActivity.is_errored.is_(False),
|
||||||
|
)
|
||||||
|
.execution_options(synchronize_session=False)
|
||||||
|
)
|
||||||
|
logger.info(f"Deleted {result.rowcount} old incoming activities") # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
async def _prune_old_inbox_objects(
|
||||||
|
db_session: AsyncSession,
|
||||||
|
) -> None:
|
||||||
|
result = await db_session.execute(
|
||||||
|
delete(models.InboxObject)
|
||||||
|
.where(
|
||||||
|
# Keep bookmarked objects
|
||||||
|
models.InboxObject.is_bookmarked.is_(False),
|
||||||
|
# Keep liked objects
|
||||||
|
models.InboxObject.liked_via_outbox_object_ap_id.is_(None),
|
||||||
|
# Keep announced objects
|
||||||
|
models.InboxObject.announced_via_outbox_object_ap_id.is_(None),
|
||||||
|
# Keep objects related to local conversations
|
||||||
|
models.InboxObject.conversation.not_like(f"{BASE_URL}/%"),
|
||||||
|
# Keep direct messages
|
||||||
|
not_(
|
||||||
|
and_(
|
||||||
|
models.InboxObject.visibility == ap.VisibilityEnum.DIRECT,
|
||||||
|
models.InboxObject.ap_type.in_(["Note"]),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
# Filter by retention days
|
||||||
|
models.InboxObject.ap_published_at
|
||||||
|
< now() - timedelta(days=INBOX_RETENTION_DAYS),
|
||||||
|
)
|
||||||
|
.execution_options(synchronize_session=False)
|
||||||
|
)
|
||||||
|
logger.info(f"Deleted {result.rowcount} old inbox objects") # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
async def run_prune_old_data() -> None:
|
||||||
|
async with async_session() as db_session:
|
||||||
|
await prune_old_data(db_session)
|
8
tasks.py
8
tasks.py
|
@ -181,3 +181,11 @@ def build_docker_image(ctx):
|
||||||
# type: (Context) -> None
|
# type: (Context) -> None
|
||||||
with embed_version():
|
with embed_version():
|
||||||
run("docker build -t microblogpub/microblogpub .")
|
run("docker build -t microblogpub/microblogpub .")
|
||||||
|
|
||||||
|
|
||||||
|
@task
|
||||||
|
def prune_old_data(ctx):
|
||||||
|
# type: (Context) -> None
|
||||||
|
from app.prune import run_prune_old_data
|
||||||
|
|
||||||
|
asyncio.run(run_prune_old_data())
|
||||||
|
|
Loading…
Reference in a new issue