From e8ee900c600480d4d8fcfcb8796e2796c83bd08d Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 5 Jul 2018 22:27:29 +0200 Subject: [PATCH] Cache attachments and actor icons Fixes #17 --- app.py | 42 ++++++++++++--- templates/layout.html | 2 +- templates/utils.html | 4 +- utils/img.py | 115 +++++++++++++++++++++++++++++++++++------- 4 files changed, 134 insertions(+), 29 deletions(-) diff --git a/app.py b/app.py index 16ed99b..b35e05d 100644 --- a/app.py +++ b/app.py @@ -37,10 +37,12 @@ from passlib.hash import bcrypt from u2flib_server import u2f from werkzeug.utils import secure_filename + import activitypub import config from activitypub import Box from activitypub import embed_collection +from config import USER_AGENT from config import ADMIN_API_KEY from config import BASE_URL from config import DB @@ -72,10 +74,12 @@ from little_boxes.httpsig import verify_request from little_boxes.webfinger import get_actor_url from little_boxes.webfinger import get_remote_follow_template from utils.img import ImageCache +from utils.img import Kind from utils.key import get_secret_key from utils.object_service import ObjectService +from typing import Optional -IMAGE_CACHE = ImageCache(GRIDFS) +IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT) OBJECT_SERVICE = ACTOR_SERVICE = ObjectService() @@ -185,28 +189,33 @@ def clean_html(html): return bleach.clean(html, tags=ALLOWED_TAGS) -_GRIDFS_CACHE: Dict[Tuple[str, int], str] = {} +_GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {} -def _get_actor_icon_url(url, size): - k = (url, size) +def _get_file_url(url, size, kind): + k = (kind, url, size) cached = _GRIDFS_CACHE.get(k) if cached: return cached - doc = IMAGE_CACHE.fs.find_one({"url": url, "size": size}) + doc = IMAGE_CACHE.get_file(url, size, kind) if doc: u = f"/img/{str(doc._id)}" _GRIDFS_CACHE[k] = u return u - IMAGE_CACHE.cache_actor_icon(url) - return _get_actor_icon_url(url, size) + IMAGE_CACHE.cache(url, kind) + return _get_file_url(url, size, kind) @app.template_filter() def get_actor_icon_url(url, size): - return _get_actor_icon_url(url, size) + return _get_file_url(url, size, Kind.ACTOR_ICON) + + +@app.template_filter() +def get_attachment_url(url, size): + return _get_file_url(url, size, Kind.ATTACHMENT) @app.template_filter() @@ -543,6 +552,23 @@ def tmp_migrate2(): return "Done" +@app.route("/migration2") +@login_required +def tmp_migrate3(): + for activity in DB.activities.find(): + try: + activity = ap.parse_activity(activity["activity"]) + actor = activity.get_actor() + if actor.icon: + IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON) + if activity.type == ActivityType.CREATE.value: + for attachment in activity.get_object()._data.get("attachment", []): + IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT) + except: + app.logger.exception('failed') + return "Done" + + @app.route("/") def index(): if is_api_request(): diff --git a/templates/layout.html b/templates/layout.html index d9bdfd0..b6c8d3e 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -4,7 +4,7 @@ -{% block title %}{{ config.NAME }}{% endblock %} - microblog.pub +{% block title %}{{ config.NAME }}{% endblock %}'s microblog diff --git a/templates/utils.html b/templates/utils.html index a00935a..0b158c3 100644 --- a/templates/utils.html +++ b/templates/utils.html @@ -48,9 +48,9 @@ {% endif %} {% for a in obj.attachment %} {% if a.url | is_img %} - + {% else %} -
  • {% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}
  • +
  • {% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}
  • {% endif %} {% endfor %} {% if obj.attachment | not_only_imgs %} diff --git a/utils/img.py b/utils/img.py index 2b08e58..dfe43d6 100644 --- a/utils/img.py +++ b/utils/img.py @@ -2,22 +2,20 @@ import base64 from gzip import GzipFile from io import BytesIO from typing import Any +import mimetypes +from enum import Enum import gridfs import requests from PIL import Image -def load(url): +def load(url, user_agent): """Initializes a `PIL.Image` from the URL.""" # TODO(tsileo): user agent - resp = requests.get(url, stream=True) - resp.raise_for_status() - try: - image = Image.open(BytesIO(resp.raw.read())) - finally: - resp.close() - return image + with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: + resp.raise_for_status() + return Image.open(BytesIO(resp.raw.read())) def to_data_uri(img): @@ -28,25 +26,106 @@ def to_data_uri(img): return f"data:{img.get_format_mimetype()};base64,{data}" -class ImageCache(object): - def __init__(self, gridfs_db: str) -> None: - self.fs = gridfs.GridFS(gridfs_db) +class Kind(Enum): + ATTACHMENT = "attachment" + ACTOR_ICON = "actor_icon" - def cache_actor_icon(self, url: str): - if self.fs.find_one({"url": url}): + +class ImageCache(object): + def __init__(self, gridfs_db: str, user_agent: str) -> None: + self.fs = gridfs.GridFS(gridfs_db) + self.user_agent = user_agent + + def cache_attachment(self, url: str) -> None: + if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}): return - i = load(url) + if ( + url.endswith(".png") + or url.endswith(".jpg") + or url.endswith(".jpeg") + or url.endswith(".gif") + ): + i = load(url, self.user_agent) + # Save the original attachment (gzipped) + with BytesIO() as buf: + f1 = GzipFile(mode="wb", fileobj=buf) + i.save(f1, format=i.format) + f1.close() + buf.seek(0) + self.fs.put( + buf, + url=url, + size=None, + content_type=i.get_format_mimetype(), + kind=Kind.ATTACHMENT.value, + ) + # Save a thumbnail (gzipped) + i.thumbnail((720, 720)) + with BytesIO() as buf: + f1 = GzipFile(mode="wb", fileobj=buf) + i.save(f1, format=i.format) + f1.close() + buf.seek(0) + self.fs.put( + buf, + url=url, + size=720, + content_type=i.get_format_mimetype(), + kind=Kind.ATTACHMENT.value, + ) + return + + # The attachment is not an image, download and save it anyway + with requests.get( + url, stream=True, headers={"User-Agent": self.user_agent} + ) as resp: + resp.raise_for_status() + with BytesIO() as buf: + f1 = GzipFile(mode="wb", fileobj=buf) + for chunk in resp.iter_content(): + if chunk: + f1.write(chunk) + f1.close() + buf.seek(0) + self.fs.put( + buf, + url=url, + size=None, + content_type=mimetypes.guess_type(url)[0], + kind=Kind.ATTACHMENT.value, + ) + + def cache_actor_icon(self, url: str) -> None: + if self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}): + return + i = load(url, self.user_agent) for size in [50, 80]: t1 = i.copy() t1.thumbnail((size, size)) with BytesIO() as buf: - f1 = GzipFile(mode='wb', fileobj=buf) + f1 = GzipFile(mode="wb", fileobj=buf) t1.save(f1, format=i.format) f1.close() buf.seek(0) self.fs.put( - buf, url=url, size=size, content_type=i.get_format_mimetype() + buf, + url=url, + size=size, + content_type=i.get_format_mimetype(), + kind=Kind.ACTOR_ICON.value, ) - def get_file(self, url: str, size: int) -> Any: - return self.fs.find_one({"url": url, "size": size}) + def cache(self, url: str, kind: Kind) -> None: + if kind == Kind.ACTOR_ICON: + self.cache_actor_icon(url) + else: + self.cache_attachment(url) + + def get_actor_icon(self, url: str, size: int) -> Any: + return self._get_file(url, size, Kind.ACTOR_ICON) + + def get_attachment(self, url: str, size: int) -> Any: + return self._get_file(url, size, Kind.ATTACHMENT) + + def get_file(self, url: str, size: int, kind: Kind) -> Any: + return self.fs.find_one({"url": url, "size": size, "kind": kind.value})