From 6458d2a6c7a0a665b97e88cf10dfbd7fe37b7e56 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 30 Jun 2022 09:25:13 +0200 Subject: [PATCH] Improve caching --- app/httpsig.py | 33 +++++++++++++++--- app/main.py | 62 ++++++++++++++++++++++++++-------- app/templates.py | 10 ++++-- app/templates/admin_inbox.html | 3 ++ poetry.lock | 2 +- pyproject.toml | 1 + 6 files changed, 87 insertions(+), 24 deletions(-) diff --git a/app/httpsig.py b/app/httpsig.py index 270ddfb..713087a 100644 --- a/app/httpsig.py +++ b/app/httpsig.py @@ -8,22 +8,27 @@ import hashlib import typing from dataclasses import dataclass from datetime import datetime -from functools import lru_cache from typing import Any from typing import Dict from typing import Optional import fastapi import httpx +from cachetools import LFUCache from Crypto.Hash import SHA256 from Crypto.Signature import PKCS1_v1_5 from loguru import logger +from sqlalchemy import select from app import activitypub as ap from app import config +from app.database import AsyncSession +from app.database import get_db_session from app.key import Key from app.key import get_key +_KEY_CACHE = LFUCache(256) + def _build_signed_string( signed_headers: str, method: str, path: str, headers: Any, body_digest: str | None @@ -62,9 +67,25 @@ def _body_digest(body: bytes) -> str: return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8") -@lru_cache(32) -async def _get_public_key(key_id: str) -> Key: - # TODO: use DB to use cache actor +async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key: + if cached_key := _KEY_CACHE.get(key_id): + return cached_key + + # Check if the key belongs to an actor already in DB + from app import models + existing_actor = ( + await db_session.scalars( + select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0]) + ) + ).one_or_none() + if existing_actor and existing_actor.public_key_id == key_id: + k = Key(existing_actor.ap_id, key_id) + k.load_pub(existing_actor.public_key_as_pem) + logger.info(f"Found {key_id} on an existing actor") + _KEY_CACHE[key_id] = k + return k + + # Fetch it from app import activitypub as ap actor = await ap.fetch(key_id) @@ -82,6 +103,7 @@ async def _get_public_key(key_id: str) -> Key: f"failed to fetch requested key {key_id}: got {actor['publicKey']['id']}" ) + _KEY_CACHE[key_id] = k return k @@ -93,6 +115,7 @@ class HTTPSigInfo: async def httpsig_checker( request: fastapi.Request, + db_session: AsyncSession = fastapi.Depends(get_db_session), ) -> HTTPSigInfo: body = await request.body() @@ -111,7 +134,7 @@ async def httpsig_checker( ) try: - k = await _get_public_key(hsig["keyId"]) + k = await _get_public_key(db_session, hsig["keyId"]) except ap.ObjectIsGoneError: logger.info("Actor is gone") return HTTPSigInfo(has_valid_signature=False) diff --git a/app/main.py b/app/main.py index cc58571..930ef8d 100644 --- a/app/main.py +++ b/app/main.py @@ -8,6 +8,7 @@ from typing import Any from typing import Type import httpx +from cachetools import LFUCache from fastapi import Depends from fastapi import FastAPI from fastapi import Form @@ -56,6 +57,9 @@ from app.utils import pagination from app.utils.emoji import EMOJIS_BY_NAME from app.webfinger import get_remote_follow_template +_RESIZED_CACHE = LFUCache(32) + + # TODO(ts): # # Next: @@ -728,7 +732,7 @@ async def serve_proxy_media(request: Request, encoded_url: str) -> StreamingResp @app.get("/proxy/media/{encoded_url}/{size}") -def serve_proxy_media_resized( +async def serve_proxy_media_resized( request: Request, encoded_url: str, size: int, @@ -738,18 +742,38 @@ def serve_proxy_media_resized( # Decode the base64-encoded URL url = base64.urlsafe_b64decode(encoded_url).decode() + + is_cached = False + is_resized = False + if cached_resp := _RESIZED_CACHE.get((url, size)): + is_resized, resized_content, resized_mimetype, resp_headers = cached_resp + if is_resized: + return PlainTextResponse( + resized_content, + media_type=resized_mimetype, + headers=resp_headers, + ) + is_cached = True + # Request the URL (and filter request headers) - proxy_resp = httpx.get( - url, - headers=[ - (k, v) - for (k, v) in request.headers.raw - if k.lower() - not in [b"host", b"cookie", b"x-forwarded-for", b"x-real-ip", b"user-agent"] - ] - + [(b"user-agent", USER_AGENT.encode())], - ) - if proxy_resp.status_code != 200: + async with httpx.AsyncClient() as client: + proxy_resp = await client.get( + url, + headers=[ + (k, v) + for (k, v) in request.headers.raw + if k.lower() + not in [ + b"host", + b"cookie", + b"x-forwarded-for", + b"x-real-ip", + b"user-agent", + ] + ] + + [(b"user-agent", USER_AGENT.encode())], + ) + if proxy_resp.status_code != 200 or (is_cached and not is_resized): return PlainTextResponse( proxy_resp.content, status_code=proxy_resp.status_code, @@ -772,15 +796,23 @@ def serve_proxy_media_resized( try: out = BytesIO(proxy_resp.content) i = Image.open(out) - if i.is_animated: + if getattr(i, "is_animated", False): raise ValueError i.thumbnail((size, size)) resized_buf = BytesIO() i.save(resized_buf, format=i.format) resized_buf.seek(0) + resized_content = resized_buf.read() + resized_mimetype = i.get_format_mimetype() # type: ignore + _RESIZED_CACHE[(url, size)] = ( + True, + resized_content, + resized_mimetype, + proxy_resp_headers, + ) return PlainTextResponse( - resized_buf.read(), - media_type=i.get_format_mimetype(), # type: ignore + resized_content, + media_type=resized_mimetype, headers=proxy_resp_headers, ) except ValueError: diff --git a/app/templates.py b/app/templates.py index 280e0f6..129d70d 100644 --- a/app/templates.py +++ b/app/templates.py @@ -190,7 +190,8 @@ def _clean_html(html: str, note: Object) -> str: strip=True, ), note, - ) + ), + is_local=note.ap_id.startswith(BASE_URL), ) except Exception: raise @@ -241,12 +242,15 @@ def _html2text(content: str) -> str: return H2T.handle(content) -def _replace_emoji(u, data): +def _replace_emoji(u: str, _) -> str: filename = hex(ord(u))[2:] return config.EMOJI_TPL.format(filename=filename, raw=u) -def _emojify(text: str): +def _emojify(text: str, is_local: bool) -> str: + if not is_local: + return text + return emoji.replace_emoji( text, replace=_replace_emoji, diff --git a/app/templates/admin_inbox.html b/app/templates/admin_inbox.html index d808ca2..478e71a 100644 --- a/app/templates/admin_inbox.html +++ b/app/templates/admin_inbox.html @@ -16,7 +16,10 @@ {{ utils.display_actor(inbox_object.actor, actors_metadata) }} {% else %} +

Implement {{ inbox_object.ap_type }} + {{ inbox_object.ap_object }} +

{% endif %} {% endfor %} diff --git a/poetry.lock b/poetry.lock index d626970..5768f8d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1143,7 +1143,7 @@ dev = ["pytest (>=4.6.2)", "black (>=19.3b0)"] [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "19151bbc858317aec5747a8f45a86b47cc198111422cc166a94634ad1941d8bc" +content-hash = "91e35a13d21bb5fd3e8916aee95c0a8019bec3cf4f0c677bb86641f1d88dcfe3" [metadata.files] aiosqlite = [ diff --git a/pyproject.toml b/pyproject.toml index 9e43a3a..1f7be3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ emoji = "^1.7.0" PyLD = "^2.0.3" aiosqlite = "^0.17.0" sqlalchemy2-stubs = "^0.0.2-alpha.24" +cachetools = "^5.2.0" [tool.poetry.dev-dependencies] black = "^22.3.0"