From 06462ec8ffec26b9923213daf83ade0f4186ecf4 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 8 Aug 2019 22:54:33 +0200 Subject: [PATCH] Improve the GC --- app.py | 6 ++++-- core/gc.py | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/app.py b/app.py index a0ae8ae..bb9d4da 100644 --- a/app.py +++ b/app.py @@ -629,10 +629,12 @@ def inbox(): logger.info(f"request_id={g.request_id} req_headers={request.headers!r}") logger.info(f"request_id={g.request_id} raw_data={data}") try: - if not verify_request( + req_verified, actor_id = verify_request( request.method, request.path, request.headers, request.data - ): + ) + if not req_verified: raise Exception("failed to verify request") + logger.info(f"request_id={g.request_id} signed by {actor_id}") except Exception: logger.exception( f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote" diff --git a/core/gc.py b/core/gc.py index 3763527..cabd956 100644 --- a/core/gc.py +++ b/core/gc.py @@ -7,6 +7,8 @@ from typing import Dict from typing import List from little_boxes import activitypub as ap +from little_boxes.errors import ActivityGoneError +from little_boxes.errors import RemoteServerUnavailableError from config import DAYS_TO_KEEP from config import ID @@ -58,6 +60,15 @@ def perform() -> None: # noqa: C901 toi = threads_of_interest() logger.info(f"thread_of_interest={toi!r}") + delete_deleted = DB.activities.delete_many( + { + "box": Box.INBOX.value, + "type": ap.ActivityType.DELETE.value, + "activity.published": {"$lt": d}, + } + ).deleted_count + logger.info(f"{delete_deleted} Delete deleted") + create_deleted = 0 create_count = 0 # Go over the old Create activities @@ -70,33 +81,41 @@ def perform() -> None: # noqa: C901 } ).limit(500): try: + logger.info(f"data={data!r}") create_count += 1 remote_id = data["remote_id"] meta = data["meta"] - activity = ap.parse_activity(data["activity"]) - logger.info(f"activity={activity!r}") # This activity has been bookmarked, keep it if meta.get("bookmarked"): _keep(data) continue - # Inspect the object - obj = activity.get_object() + obj = None + if not meta.get("deleted"): + try: + activity = ap.parse_activity(data["activity"]) + logger.info(f"activity={activity!r}") + obj = activity.get_object() + except (RemoteServerUnavailableError, ActivityGoneError): + logger.exception( + f"failed to load {remote_id}, this activity will be deleted" + ) # This activity mentions the server actor, keep it - if obj.has_mention(ID): + if obj and obj.has_mention(ID): _keep(data) continue # This activity is a direct reply of one the server actor activity, keep it - in_reply_to = obj.get_in_reply_to() - if in_reply_to and in_reply_to.startswith(ID): - _keep(data) - continue + if obj: + in_reply_to = obj.get_in_reply_to() + if in_reply_to and in_reply_to.startswith(ID): + _keep(data) + continue # This activity is part of a thread we want to keep, keep it - if in_reply_to and meta.get("thread_root_parent"): + if obj and in_reply_to and meta.get("thread_root_parent"): thread_root_parent = meta["thread_root_parent"] if thread_root_parent.startswith(ID) or thread_root_parent in toi: _keep(data)