forked from forks/microblog.pub
Cleanup
This commit is contained in:
parent
2180a79cf2
commit
48618c9694
2 changed files with 49 additions and 40 deletions
|
@ -226,6 +226,7 @@ ME = {
|
||||||
"publicKey": KEY.to_dict(),
|
"publicKey": KEY.to_dict(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Default emojis, space-separated, update `me.yml` to customize emojis
|
||||||
EMOJIS = "😺 😸 😹 😻 😼 😽 🙀 😿 😾"
|
EMOJIS = "😺 😸 😹 😻 😼 😽 🙀 😿 😾"
|
||||||
if conf.get("emojis"):
|
if conf.get("emojis"):
|
||||||
EMOJIS = conf["emojis"]
|
EMOJIS = conf["emojis"]
|
||||||
|
@ -235,5 +236,8 @@ EMOJI_TPL = '<img src="https://cdn.jsdelivr.net/npm/twemoji@12.0.0/2/svg/{filena
|
||||||
if conf.get("emoji_tpl"):
|
if conf.get("emoji_tpl"):
|
||||||
EMOJI_TPL = conf["emoji_tpl"]
|
EMOJI_TPL = conf["emoji_tpl"]
|
||||||
|
|
||||||
# Host blacklist
|
# Hosts blacklist
|
||||||
BLACKLIST = conf.get("blacklist", [])
|
BLACKLIST = conf.get("blacklist", [])
|
||||||
|
|
||||||
|
# By default, we keep 14 of inbox data ; outbox is kept forever (along with bookmarked stuff, outbox replies, liked...)
|
||||||
|
DAYS_TO_KEEP = 14
|
||||||
|
|
83
gc.py
83
gc.py
|
@ -1,10 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Any
|
|
||||||
from typing import Dict
|
|
||||||
from typing import List
|
from typing import List
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from little_boxes import activitypub as ap
|
from little_boxes import activitypub as ap
|
||||||
|
|
||||||
|
@ -13,9 +10,8 @@ from activitypub import Box
|
||||||
from config import ID
|
from config import ID
|
||||||
from config import ME
|
from config import ME
|
||||||
from config import MEDIA_CACHE
|
from config import MEDIA_CACHE
|
||||||
|
from config import DAYS_TO_KEEP
|
||||||
from utils.migrations import DB
|
from utils.migrations import DB
|
||||||
from utils.migrations import Migration
|
|
||||||
from utils.migrations import logger
|
|
||||||
|
|
||||||
back = activitypub.MicroblogPubBackend()
|
back = activitypub.MicroblogPubBackend()
|
||||||
ap.use_backend(back)
|
ap.use_backend(back)
|
||||||
|
@ -50,8 +46,9 @@ def threads_of_interest() -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
def perform() -> None:
|
def perform() -> None:
|
||||||
d = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d")
|
d = (datetime.utcnow() - timedelta(days=DAYS_TO_KEEP)).strftime("%Y-%m-%d")
|
||||||
toi = threads_of_interest()
|
toi = threads_of_interest()
|
||||||
|
logger.info(f"thread_of_interest={toi!r}")
|
||||||
|
|
||||||
# Go over the old Create activities
|
# Go over the old Create activities
|
||||||
for data in DB.activities.find(
|
for data in DB.activities.find(
|
||||||
|
@ -60,41 +57,49 @@ def perform() -> None:
|
||||||
"type": ap.ActivityType.CREATE.value,
|
"type": ap.ActivityType.CREATE.value,
|
||||||
"activity.published": {"$lt": d},
|
"activity.published": {"$lt": d},
|
||||||
}
|
}
|
||||||
).limit(1000):
|
):
|
||||||
remote_id = data["remote_id"]
|
try:
|
||||||
meta = data["meta"]
|
remote_id = data["remote_id"]
|
||||||
activity = ap.parse_activity(data["activity"])
|
meta = data["meta"]
|
||||||
logger.info(f"{activity}")
|
activity = ap.parse_activity(data["activity"])
|
||||||
|
logger.info(f"activity={activity!r}")
|
||||||
|
|
||||||
# This activity has been bookmarked, keep it
|
# This activity has been bookmarked, keep it
|
||||||
if meta.get("bookmarked"):
|
if meta.get("bookmarked"):
|
||||||
continue
|
|
||||||
|
|
||||||
# Inspect the object
|
|
||||||
obj = activity.get_object()
|
|
||||||
|
|
||||||
# This activity mentions the server actor, keep it
|
|
||||||
if obj.has_mention(ID):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# This activity is a direct reply of one the server actor activity, keep it
|
|
||||||
in_reply_to = obj.get_in_reply_to()
|
|
||||||
if in_reply_to and in_reply_to.startswith(ID):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# This activity is part of a thread we want to keep, keep it
|
|
||||||
if in_reply_to and meta.get("thread_root_parent"):
|
|
||||||
thread_root_parent = meta["thread_root_parent"]
|
|
||||||
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# This activity was boosted or liked, keep it
|
# Inspect the object
|
||||||
if meta.get("boosted") or meta.get("liked"):
|
obj = activity.get_object()
|
||||||
continue
|
|
||||||
|
|
||||||
# Delete the cached attachment
|
# This activity mentions the server actor, keep it
|
||||||
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}):
|
if obj.has_mention(ID):
|
||||||
MEDIA_CACHE.fs.delete(grid_item._id)
|
continue
|
||||||
|
|
||||||
# Delete the activity
|
# This activity is a direct reply of one the server actor activity, keep it
|
||||||
DB.activities.delete_one({"_id": data["_id"]})
|
in_reply_to = obj.get_in_reply_to()
|
||||||
|
if in_reply_to and in_reply_to.startswith(ID):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# This activity is part of a thread we want to keep, keep it
|
||||||
|
if in_reply_to and meta.get("thread_root_parent"):
|
||||||
|
thread_root_parent = meta["thread_root_parent"]
|
||||||
|
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# This activity was boosted or liked, keep it
|
||||||
|
if meta.get("boosted") or meta.get("liked"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# TODO(tsileo): remove after tests
|
||||||
|
if meta.get("keep"):
|
||||||
|
logger.warning(f"{activity!r} would not have been deleted, skipping for now")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Delete the cached attachment
|
||||||
|
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}):
|
||||||
|
MEDIA_CACHE.fs.delete(grid_item._id)
|
||||||
|
|
||||||
|
# Delete the activity
|
||||||
|
DB.activities.delete_one({"_id": data["_id"]})
|
||||||
|
except Exception:
|
||||||
|
logger.exception(f"failed to process {data!r}")
|
||||||
|
|
Loading…
Reference in a new issue