2018-07-04 23:02:51 +00:00
|
|
|
import base64
|
2018-07-06 21:15:49 +00:00
|
|
|
import mimetypes
|
|
|
|
from enum import Enum
|
2019-08-05 20:40:24 +00:00
|
|
|
from enum import unique
|
2019-08-15 14:08:52 +00:00
|
|
|
from functools import lru_cache
|
2018-07-04 23:02:51 +00:00
|
|
|
from gzip import GzipFile
|
|
|
|
from io import BytesIO
|
2019-08-20 20:16:47 +00:00
|
|
|
from shutil import copyfileobj
|
2018-07-04 23:02:51 +00:00
|
|
|
from typing import Any
|
2019-07-05 20:05:28 +00:00
|
|
|
from typing import Dict
|
2019-08-20 20:16:47 +00:00
|
|
|
from typing import Optional
|
|
|
|
from typing import Tuple
|
2019-07-05 20:05:28 +00:00
|
|
|
|
2018-07-04 23:02:51 +00:00
|
|
|
import gridfs
|
2018-07-06 21:15:49 +00:00
|
|
|
import piexif
|
2018-07-04 23:02:51 +00:00
|
|
|
import requests
|
2019-07-05 20:09:41 +00:00
|
|
|
from little_boxes import activitypub as ap
|
2018-07-04 23:02:51 +00:00
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
2019-08-15 14:08:52 +00:00
|
|
|
@lru_cache(2048)
|
|
|
|
def _is_img(filename):
|
|
|
|
mimetype, _ = mimetypes.guess_type(filename.lower())
|
|
|
|
if mimetype and mimetype.split("/")[0] in ["image"]:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(2048)
|
|
|
|
def is_video(filename):
|
|
|
|
mimetype, _ = mimetypes.guess_type(filename.lower())
|
|
|
|
if mimetype and mimetype.split("/")[0] in ["video"]:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2019-08-20 20:16:47 +00:00
|
|
|
def _load(url: str, user_agent: str) -> Tuple[BytesIO, Optional[str]]:
|
2018-07-04 23:02:51 +00:00
|
|
|
"""Initializes a `PIL.Image` from the URL."""
|
2019-08-20 20:16:47 +00:00
|
|
|
out = BytesIO()
|
2018-07-05 20:27:29 +00:00
|
|
|
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
|
|
|
|
resp.raise_for_status()
|
2018-07-24 21:58:13 +00:00
|
|
|
|
2018-07-06 21:15:49 +00:00
|
|
|
resp.raw.decode_content = True
|
2019-08-20 20:16:47 +00:00
|
|
|
while 1:
|
|
|
|
buf = resp.raw.read()
|
|
|
|
if not buf:
|
|
|
|
break
|
|
|
|
out.write(buf)
|
|
|
|
out.seek(0)
|
|
|
|
return out, resp.headers.get("content-type")
|
|
|
|
|
|
|
|
|
|
|
|
def load(url: str, user_agent: str) -> Image:
|
|
|
|
"""Initializes a `PIL.Image` from the URL."""
|
|
|
|
out, _ = _load(url, user_agent)
|
|
|
|
return Image.open(out)
|
2018-07-04 23:02:51 +00:00
|
|
|
|
|
|
|
|
2019-08-05 20:40:24 +00:00
|
|
|
def to_data_uri(img: Image) -> str:
|
2018-07-04 23:02:51 +00:00
|
|
|
out = BytesIO()
|
|
|
|
img.save(out, format=img.format)
|
|
|
|
out.seek(0)
|
|
|
|
data = base64.b64encode(out.read()).decode("utf-8")
|
|
|
|
return f"data:{img.get_format_mimetype()};base64,{data}"
|
|
|
|
|
|
|
|
|
2019-08-05 20:40:24 +00:00
|
|
|
@unique
|
2018-07-05 20:27:29 +00:00
|
|
|
class Kind(Enum):
|
|
|
|
ATTACHMENT = "attachment"
|
|
|
|
ACTOR_ICON = "actor_icon"
|
2018-07-06 21:15:49 +00:00
|
|
|
UPLOAD = "upload"
|
2018-07-21 21:16:40 +00:00
|
|
|
OG_IMAGE = "og"
|
2019-08-20 20:16:47 +00:00
|
|
|
EMOJI = "emoji"
|
2018-07-05 20:27:29 +00:00
|
|
|
|
|
|
|
|
2018-07-06 21:15:49 +00:00
|
|
|
class MediaCache(object):
|
2018-07-05 20:27:29 +00:00
|
|
|
def __init__(self, gridfs_db: str, user_agent: str) -> None:
|
2018-07-04 23:02:51 +00:00
|
|
|
self.fs = gridfs.GridFS(gridfs_db)
|
2018-07-05 20:27:29 +00:00
|
|
|
self.user_agent = user_agent
|
|
|
|
|
2019-07-18 18:48:49 +00:00
|
|
|
def cache_og_image(self, url: str, remote_id: str) -> None:
|
2019-04-08 14:41:09 +00:00
|
|
|
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
|
|
|
|
return
|
|
|
|
i = load(url, self.user_agent)
|
|
|
|
# Save the original attachment (gzipped)
|
|
|
|
i.thumbnail((100, 100))
|
|
|
|
with BytesIO() as buf:
|
|
|
|
with GzipFile(mode="wb", fileobj=buf) as f1:
|
|
|
|
i.save(f1, format=i.format)
|
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
size=100,
|
|
|
|
content_type=i.get_format_mimetype(),
|
|
|
|
kind=Kind.OG_IMAGE.value,
|
|
|
|
remote_id=remote_id,
|
|
|
|
)
|
|
|
|
|
2019-07-18 18:48:49 +00:00
|
|
|
def cache_attachment(self, attachment: Dict[str, Any], remote_id: str) -> None:
|
2019-07-05 20:05:28 +00:00
|
|
|
url = attachment["url"]
|
|
|
|
|
|
|
|
# Ensure it's not already there
|
2019-07-21 19:53:10 +00:00
|
|
|
if self.fs.find_one(
|
|
|
|
{"url": url, "kind": Kind.ATTACHMENT.value, "remote_id": remote_id}
|
|
|
|
):
|
2019-04-08 14:41:09 +00:00
|
|
|
return
|
2019-07-05 20:05:28 +00:00
|
|
|
|
|
|
|
# If it's an image, make some thumbnails
|
2019-04-08 14:41:09 +00:00
|
|
|
if (
|
2019-08-15 14:08:52 +00:00
|
|
|
_is_img(url)
|
2019-07-05 20:05:28 +00:00
|
|
|
or attachment.get("mediaType", "").startswith("image/")
|
|
|
|
or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE)
|
2019-04-08 14:41:09 +00:00
|
|
|
):
|
2019-07-05 20:05:28 +00:00
|
|
|
try:
|
|
|
|
i = load(url, self.user_agent)
|
|
|
|
# Save the original attachment (gzipped)
|
|
|
|
with BytesIO() as buf:
|
|
|
|
f1 = GzipFile(mode="wb", fileobj=buf)
|
2019-04-08 14:41:09 +00:00
|
|
|
i.save(f1, format=i.format)
|
2019-07-05 20:05:28 +00:00
|
|
|
f1.close()
|
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
size=None,
|
|
|
|
content_type=i.get_format_mimetype(),
|
|
|
|
kind=Kind.ATTACHMENT.value,
|
|
|
|
remote_id=remote_id,
|
|
|
|
)
|
|
|
|
# Save a thumbnail (gzipped)
|
|
|
|
i.thumbnail((720, 720))
|
|
|
|
with BytesIO() as buf:
|
|
|
|
with GzipFile(mode="wb", fileobj=buf) as f1:
|
|
|
|
i.save(f1, format=i.format)
|
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
size=720,
|
|
|
|
content_type=i.get_format_mimetype(),
|
|
|
|
kind=Kind.ATTACHMENT.value,
|
|
|
|
remote_id=remote_id,
|
|
|
|
)
|
|
|
|
return
|
|
|
|
except Exception:
|
|
|
|
# FIXME(tsileo): logging
|
|
|
|
pass
|
2019-04-08 14:41:09 +00:00
|
|
|
|
2018-07-05 20:27:29 +00:00
|
|
|
# The attachment is not an image, download and save it anyway
|
|
|
|
with requests.get(
|
|
|
|
url, stream=True, headers={"User-Agent": self.user_agent}
|
|
|
|
) as resp:
|
|
|
|
resp.raise_for_status()
|
|
|
|
with BytesIO() as buf:
|
2018-07-06 21:15:49 +00:00
|
|
|
with GzipFile(mode="wb", fileobj=buf) as f1:
|
2019-08-15 14:08:52 +00:00
|
|
|
for chunk in resp.iter_content(chunk_size=2 << 20):
|
2018-07-06 21:15:49 +00:00
|
|
|
if chunk:
|
2019-08-15 14:08:52 +00:00
|
|
|
print(len(chunk))
|
2018-07-06 21:15:49 +00:00
|
|
|
f1.write(chunk)
|
2018-07-05 20:27:29 +00:00
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
size=None,
|
|
|
|
content_type=mimetypes.guess_type(url)[0],
|
|
|
|
kind=Kind.ATTACHMENT.value,
|
2019-04-08 14:41:09 +00:00
|
|
|
remote_id=remote_id,
|
2018-07-05 20:27:29 +00:00
|
|
|
)
|
2018-07-04 23:02:51 +00:00
|
|
|
|
2019-08-11 11:56:18 +00:00
|
|
|
def is_actor_icon_cached(self, url: str) -> bool:
|
|
|
|
return bool(self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}))
|
|
|
|
|
2018-07-05 20:27:29 +00:00
|
|
|
def cache_actor_icon(self, url: str) -> None:
|
2019-08-11 11:56:18 +00:00
|
|
|
if self.is_actor_icon_cached(url):
|
2018-07-04 23:02:51 +00:00
|
|
|
return
|
2018-07-05 20:27:29 +00:00
|
|
|
i = load(url, self.user_agent)
|
2018-07-04 23:02:51 +00:00
|
|
|
for size in [50, 80]:
|
|
|
|
t1 = i.copy()
|
|
|
|
t1.thumbnail((size, size))
|
|
|
|
with BytesIO() as buf:
|
2018-07-06 21:15:49 +00:00
|
|
|
with GzipFile(mode="wb", fileobj=buf) as f1:
|
|
|
|
t1.save(f1, format=i.format)
|
2018-07-04 23:02:51 +00:00
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
2018-07-05 20:27:29 +00:00
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
size=size,
|
|
|
|
content_type=i.get_format_mimetype(),
|
|
|
|
kind=Kind.ACTOR_ICON.value,
|
2018-07-04 23:02:51 +00:00
|
|
|
)
|
|
|
|
|
2019-08-20 20:16:47 +00:00
|
|
|
def is_emoji_cached(self, url: str) -> bool:
|
|
|
|
return bool(self.fs.find_one({"url": url, "kind": Kind.EMOJI.value}))
|
|
|
|
|
|
|
|
def cache_emoji(self, url: str, iri: str) -> None:
|
|
|
|
if self.is_emoji_cached(url):
|
|
|
|
return
|
|
|
|
src, content_type = _load(url, self.user_agent)
|
|
|
|
with BytesIO() as buf:
|
|
|
|
with GzipFile(mode="wb", fileobj=buf) as g:
|
|
|
|
copyfileobj(src, g)
|
|
|
|
buf.seek(0)
|
|
|
|
self.fs.put(
|
|
|
|
buf,
|
|
|
|
url=url,
|
|
|
|
remote_id=iri,
|
|
|
|
size=None,
|
|
|
|
content_type=content_type or mimetypes.guess_type(url)[0],
|
|
|
|
kind=Kind.EMOJI.value,
|
|
|
|
)
|
|
|
|
|
2018-07-06 21:15:49 +00:00
|
|
|
def save_upload(self, obuf: BytesIO, filename: str) -> str:
|
|
|
|
# Remove EXIF metadata
|
|
|
|
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
|
|
|
|
obuf.seek(0)
|
|
|
|
with BytesIO() as buf2:
|
|
|
|
piexif.remove(obuf.getvalue(), buf2)
|
|
|
|
obuf.truncate(0)
|
|
|
|
obuf.write(buf2.getvalue())
|
|
|
|
|
|
|
|
obuf.seek(0)
|
|
|
|
mtype = mimetypes.guess_type(filename)[0]
|
|
|
|
with BytesIO() as gbuf:
|
|
|
|
with GzipFile(mode="wb", fileobj=gbuf) as gzipfile:
|
|
|
|
gzipfile.write(obuf.getvalue())
|
|
|
|
|
|
|
|
gbuf.seek(0)
|
|
|
|
oid = self.fs.put(
|
|
|
|
gbuf,
|
|
|
|
content_type=mtype,
|
|
|
|
upload_filename=filename,
|
|
|
|
kind=Kind.UPLOAD.value,
|
|
|
|
)
|
|
|
|
return str(oid)
|
|
|
|
|
2018-07-05 20:27:29 +00:00
|
|
|
def get_actor_icon(self, url: str, size: int) -> Any:
|
2018-07-05 20:42:38 +00:00
|
|
|
return self.get_file(url, size, Kind.ACTOR_ICON)
|
2018-07-05 20:27:29 +00:00
|
|
|
|
|
|
|
def get_attachment(self, url: str, size: int) -> Any:
|
2018-07-05 20:42:38 +00:00
|
|
|
return self.get_file(url, size, Kind.ATTACHMENT)
|
2018-07-05 20:27:29 +00:00
|
|
|
|
|
|
|
def get_file(self, url: str, size: int, kind: Kind) -> Any:
|
|
|
|
return self.fs.find_one({"url": url, "size": size, "kind": kind.value})
|