Improve video caching

This commit is contained in:
Thomas Sileo 2019-08-15 16:08:52 +02:00
parent 6c83f5852b
commit 043e9a79dc
3 changed files with 52 additions and 21 deletions

View file

@ -39,6 +39,7 @@ from core.shared import p
from core.tasks import Tasks from core.tasks import Tasks
from utils import now from utils import now
from utils import opengraph from utils import opengraph
from utils.media import is_video
blueprint = flask.Blueprint("tasks", __name__) blueprint = flask.Blueprint("tasks", __name__)
@ -216,6 +217,25 @@ def task_finish_post_to_inbox() -> _Response:
return "" return ""
def select_video_to_cache(links):
"""Try to find the 360p version from a video urls, or return the smallest one."""
videos = []
for link in links:
if link.get("mimeType", "").startswith("video/") or is_video(link["href"]):
videos.append({"href": link["href"], "height": link["height"]})
if not videos:
app.logger.warning(f"failed to select a video from {links!r}")
return None
videos = sorted(videos, key=lambda l: l["height"])
for video in videos:
if video["height"] == 360:
return video
return videos[0]
@blueprint.route("/task/cache_attachments", methods=["POST"]) @blueprint.route("/task/cache_attachments", methods=["POST"])
def task_cache_attachments() -> _Response: def task_cache_attachments() -> _Response:
task = p.parse(flask.request) task = p.parse(flask.request)
@ -230,10 +250,10 @@ def task_cache_attachments() -> _Response:
if obj.has_type(ap.ActivityType.VIDEO): if obj.has_type(ap.ActivityType.VIDEO):
if isinstance(obj.url, list): if isinstance(obj.url, list):
for link in obj.url: # TODO: filter only videogt
if link.get("mimeType", "").startswith("video/"): link = select_video_to_cache(obj.url)
config.MEDIA_CACHE.cache_attachment({"url": link["href"]}, iri) if link:
break config.MEDIA_CACHE.cache_attachment({"url": link["href"]}, iri)
elif isinstance(obj.url, str): elif isinstance(obj.url, str):
config.MEDIA_CACHE.cache_attachment({"url": obj.url}, iri) config.MEDIA_CACHE.cache_attachment({"url": obj.url}, iri)
else: else:
@ -290,9 +310,13 @@ def task_cache_actor() -> _Response:
if not activity.has_type([ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE]): if not activity.has_type([ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE]):
return "" return ""
if activity.get_object()._data.get( if (
"attachment", [] activity.has_type(ap.ActivityType.CREATE)
) or activity.get_object().has_type(ap.ActivityType.VIDEO): and activity.get_object()._data.get("attachment", [])
) or (
activity.has_type(ap.ActivityType.ANNOUNCE)
and activity.get_object().has_type(ap.ActivityType.VIDEO)
):
Tasks.cache_attachments(iri) Tasks.cache_attachments(iri)
except (ActivityGoneError, ActivityNotFoundError): except (ActivityGoneError, ActivityNotFoundError):

View file

@ -2,6 +2,7 @@ import base64
import mimetypes import mimetypes
from enum import Enum from enum import Enum
from enum import unique from enum import unique
from functools import lru_cache
from gzip import GzipFile from gzip import GzipFile
from io import BytesIO from io import BytesIO
from typing import Any from typing import Any
@ -14,6 +15,22 @@ from little_boxes import activitypub as ap
from PIL import Image from PIL import Image
@lru_cache(2048)
def _is_img(filename):
mimetype, _ = mimetypes.guess_type(filename.lower())
if mimetype and mimetype.split("/")[0] in ["image"]:
return True
return False
@lru_cache(2048)
def is_video(filename):
mimetype, _ = mimetypes.guess_type(filename.lower())
if mimetype and mimetype.split("/")[0] in ["video"]:
return True
return False
def load(url: str, user_agent: str) -> Image: def load(url: str, user_agent: str) -> Image:
"""Initializes a `PIL.Image` from the URL.""" """Initializes a `PIL.Image` from the URL."""
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
@ -74,10 +91,7 @@ class MediaCache(object):
# If it's an image, make some thumbnails # If it's an image, make some thumbnails
if ( if (
url.endswith(".png") _is_img(url)
or url.endswith(".jpg")
or url.endswith(".jpeg")
or url.endswith(".gif")
or attachment.get("mediaType", "").startswith("image/") or attachment.get("mediaType", "").startswith("image/")
or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE) or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE)
): ):
@ -123,8 +137,9 @@ class MediaCache(object):
resp.raise_for_status() resp.raise_for_status()
with BytesIO() as buf: with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as f1: with GzipFile(mode="wb", fileobj=buf) as f1:
for chunk in resp.iter_content(): for chunk in resp.iter_content(chunk_size=2 << 20):
if chunk: if chunk:
print(len(chunk))
f1.write(chunk) f1.write(chunk)
buf.seek(0) buf.seek(0)
self.fs.put( self.fs.put(

View file

@ -1,5 +1,4 @@
import logging import logging
import mimetypes
import urllib import urllib
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
@ -22,6 +21,7 @@ from config import MEDIA_CACHE
from core.activitypub import _answer_key from core.activitypub import _answer_key
from utils import parse_datetime from utils import parse_datetime
from utils.media import Kind from utils.media import Kind
from utils.media import _is_img
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
@ -312,14 +312,6 @@ def has_actor_type(doc):
return False return False
@lru_cache(512)
def _is_img(filename):
mimetype, _ = mimetypes.guess_type(filename.lower())
if mimetype and mimetype.split("/")[0] in ["image"]:
return True
return False
@filters.app_template_filter() @filters.app_template_filter()
def not_only_imgs(attachment): def not_only_imgs(attachment):
for a in attachment: for a in attachment: