mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-11-15 03:04:28 +00:00
Disable old cleanup and start working on GC, clean media cache
This commit is contained in:
parent
733d6c0ca5
commit
2180a79cf2
4 changed files with 123 additions and 257 deletions
208
app.py
208
app.py
|
@ -19,6 +19,7 @@ from urllib.parse import urlparse
|
||||||
|
|
||||||
import bleach
|
import bleach
|
||||||
import emoji_unicode
|
import emoji_unicode
|
||||||
|
import html2text
|
||||||
import mf2py
|
import mf2py
|
||||||
import requests
|
import requests
|
||||||
import timeago
|
import timeago
|
||||||
|
@ -34,7 +35,6 @@ from flask import request
|
||||||
from flask import session
|
from flask import session
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from flask_wtf.csrf import CSRFProtect
|
from flask_wtf.csrf import CSRFProtect
|
||||||
import html2text
|
|
||||||
from itsdangerous import BadSignature
|
from itsdangerous import BadSignature
|
||||||
from little_boxes import activitypub as ap
|
from little_boxes import activitypub as ap
|
||||||
from little_boxes.activitypub import ActivityType
|
from little_boxes.activitypub import ActivityType
|
||||||
|
@ -2675,14 +2675,14 @@ def task_fetch_og_meta():
|
||||||
for og in og_metadata:
|
for og in og_metadata:
|
||||||
if not og.get("image"):
|
if not og.get("image"):
|
||||||
continue
|
continue
|
||||||
MEDIA_CACHE.cache_og_image2(og["image"], iri)
|
MEDIA_CACHE.cache_og_image(og["image"], iri)
|
||||||
|
|
||||||
app.logger.debug(f"OG metadata {og_metadata!r}")
|
app.logger.debug(f"OG metadata {og_metadata!r}")
|
||||||
DB.activities.update_one(
|
DB.activities.update_one(
|
||||||
{"remote_id": iri}, {"$set": {"meta.og_metadata": og_metadata}}
|
{"remote_id": iri}, {"$set": {"meta.og_metadata": og_metadata}}
|
||||||
)
|
)
|
||||||
|
|
||||||
app.logger.info(f"OG metadata fetched for {iri}")
|
app.logger.info(f"OG metadata fetched for {iri}: {og_metadata}")
|
||||||
except (ActivityGoneError, ActivityNotFoundError):
|
except (ActivityGoneError, ActivityNotFoundError):
|
||||||
app.logger.exception(f"dropping activity {iri}, skip OG metedata")
|
app.logger.exception(f"dropping activity {iri}, skip OG metedata")
|
||||||
return ""
|
return ""
|
||||||
|
@ -2893,33 +2893,12 @@ def task_cache_attachments():
|
||||||
app.logger.info(f"activity={activity!r}")
|
app.logger.info(f"activity={activity!r}")
|
||||||
# Generates thumbnails for the actor's icon and the attachments if any
|
# Generates thumbnails for the actor's icon and the attachments if any
|
||||||
|
|
||||||
actor = activity.get_actor()
|
obj = activity.get_object()
|
||||||
|
|
||||||
# Update the cached actor
|
|
||||||
DB.actors.update_one(
|
|
||||||
{"remote_id": iri},
|
|
||||||
{"$set": {"remote_id": iri, "data": actor.to_dict(embed=True)}},
|
|
||||||
upsert=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if actor.icon:
|
|
||||||
MEDIA_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
|
|
||||||
|
|
||||||
obj = None
|
|
||||||
if activity.has_type(ap.ActivityType.CREATE):
|
|
||||||
# This means a `Create` triggered the task
|
|
||||||
obj = activity.get_object()
|
|
||||||
elif activity.has_type(ap.CREATE_TYPES):
|
|
||||||
# This means a `Announce` triggered the task
|
|
||||||
obj = activity
|
|
||||||
else:
|
|
||||||
app.logger.warning(f"Don't know what to do with {activity!r}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Iter the attachments
|
# Iter the attachments
|
||||||
for attachment in obj._data.get("attachment", []):
|
for attachment in obj._data.get("attachment", []):
|
||||||
try:
|
try:
|
||||||
MEDIA_CACHE.cache_attachment2(attachment, iri)
|
MEDIA_CACHE.cache_attachment(attachment, iri)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
app.logger.exception(f"failed to cache {attachment}")
|
app.logger.exception(f"failed to cache {attachment}")
|
||||||
|
|
||||||
|
@ -2938,22 +2917,28 @@ def task_cache_attachments():
|
||||||
def task_cache_actor() -> str:
|
def task_cache_actor() -> str:
|
||||||
task = p.parse(request)
|
task = p.parse(request)
|
||||||
app.logger.info(f"task={task!r}")
|
app.logger.info(f"task={task!r}")
|
||||||
iri, also_cache_attachments = (
|
iri = task.payload["iri"]
|
||||||
task.payload["iri"],
|
|
||||||
task.payload.get("also_cache_attachments", True),
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
activity = ap.fetch_remote_activity(iri)
|
activity = ap.fetch_remote_activity(iri)
|
||||||
app.logger.info(f"activity={activity!r}")
|
app.logger.info(f"activity={activity!r}")
|
||||||
|
|
||||||
# FIXME(tsileo): OG meta for Announce?
|
# Fetch the Open Grah metadata if it's a `Create`
|
||||||
if activity.has_type(ap.ActivityType.CREATE):
|
if activity.has_type(ap.ActivityType.CREATE):
|
||||||
Tasks.fetch_og_meta(iri)
|
Tasks.fetch_og_meta(iri)
|
||||||
|
|
||||||
if activity.has_type([ap.ActivityType.LIKE, ap.ActivityType.ANNOUNCE]):
|
# Cache the object if it's a `Like` or an `Announce` unrelated to the server outbox (because it will never get
|
||||||
|
# displayed)
|
||||||
|
if activity.has_type(
|
||||||
|
[ap.ActivityType.LIKE, ap.ActivityType.ANNOUNCE]
|
||||||
|
) and not activity.get_object_id().startswith(BASE_URL):
|
||||||
Tasks.cache_object(iri)
|
Tasks.cache_object(iri)
|
||||||
|
|
||||||
actor = activity.get_actor()
|
actor = activity.get_actor()
|
||||||
|
if actor.icon:
|
||||||
|
if isinstance(actor.icon, dict) and "url" in actor.icon:
|
||||||
|
MEDIA_CACHE.cache_actor_icon(actor.icon["url"])
|
||||||
|
else:
|
||||||
|
app.logger.warning(f"failed to parse icon {actor.icon} for {iri}")
|
||||||
|
|
||||||
if activity.has_type(ap.ActivityType.FOLLOW):
|
if activity.has_type(ap.ActivityType.FOLLOW):
|
||||||
if actor.id == ID:
|
if actor.id == ID:
|
||||||
|
@ -2973,12 +2958,8 @@ def task_cache_actor() -> str:
|
||||||
)
|
)
|
||||||
|
|
||||||
app.logger.info(f"actor cached for {iri}")
|
app.logger.info(f"actor cached for {iri}")
|
||||||
if also_cache_attachments and activity.has_type(ap.ActivityType.CREATE):
|
if activity.has_type([ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE]):
|
||||||
Tasks.cache_attachments(iri)
|
Tasks.cache_attachments(iri)
|
||||||
elif also_cache_attachments and activity.has_type(ap.ActivityType.ANNOUNCE):
|
|
||||||
obj = activity.get_object()
|
|
||||||
Tasks.cache_attachments(obj.id)
|
|
||||||
Tasks.cache_actor(obj.id)
|
|
||||||
|
|
||||||
except (ActivityGoneError, ActivityNotFoundError):
|
except (ActivityGoneError, ActivityNotFoundError):
|
||||||
DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}})
|
DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}})
|
||||||
|
@ -3291,129 +3272,13 @@ def task_update_question():
|
||||||
def task_cleanup():
|
def task_cleanup():
|
||||||
task = p.parse(request)
|
task = p.parse(request)
|
||||||
app.logger.info(f"task={task!r}")
|
app.logger.info(f"task={task!r}")
|
||||||
p.push({}, "/task/cleanup_part_1")
|
# p.push({}, "/task/cleanup_part_1")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
@app.route("/task/cleanup_part_1", methods=["POST"])
|
|
||||||
def task_cleanup_part_1():
|
def task_cleanup_part_1():
|
||||||
task = p.parse(request)
|
task = p.parse(request)
|
||||||
app.logger.info(f"task={task!r}")
|
app.logger.info(f"task={task!r}")
|
||||||
d = (datetime.utcnow() - timedelta(days=15)).strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
# (We keep Follow and Accept forever)
|
|
||||||
|
|
||||||
# Announce and Like cleanup
|
|
||||||
for ap_type in [ActivityType.ANNOUNCE, ActivityType.LIKE]:
|
|
||||||
# Migrate old (before meta.keep activities on the fly)
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ap_type.value,
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
"activity.object": {"$regex": f"^{BASE_URL}"},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": True}},
|
|
||||||
)
|
|
||||||
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ap_type.value,
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
"activity.object.id": {"$regex": f"^{BASE_URL}"},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": True}},
|
|
||||||
)
|
|
||||||
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ap_type.value,
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": False}},
|
|
||||||
)
|
|
||||||
# End of the migration
|
|
||||||
|
|
||||||
# Delete old activities
|
|
||||||
DB.activities.delete_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ap_type.value,
|
|
||||||
"meta.keep": False,
|
|
||||||
"activity.published": {"$lt": d},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# And delete the soft-deleted one
|
|
||||||
DB.activities.delete_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ap_type.value,
|
|
||||||
"meta.keep": False,
|
|
||||||
"meta.deleted": True,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create cleanup (more complicated)
|
|
||||||
# The one that mention our actor
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
"activity.object.tag.href": {"$regex": f"^{BASE_URL}"},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": True}},
|
|
||||||
)
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.REPLIES.value,
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
"activity.tag.href": {"$regex": f"^{BASE_URL}"},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": True}},
|
|
||||||
)
|
|
||||||
|
|
||||||
# The replies of the outbox
|
|
||||||
DB.activities.update_many(
|
|
||||||
{"meta.thread_root_parent": {"$regex": f"^{BASE_URL}"}},
|
|
||||||
{"$set": {"meta.keep": True}},
|
|
||||||
)
|
|
||||||
# Track all the threads we participated
|
|
||||||
keep_threads = []
|
|
||||||
for data in DB.activities.find(
|
|
||||||
{
|
|
||||||
"box": Box.OUTBOX.value,
|
|
||||||
"type": ActivityType.CREATE.value,
|
|
||||||
"meta.thread_root_parent": {"$exists": True},
|
|
||||||
}
|
|
||||||
):
|
|
||||||
keep_threads.append(data["meta"]["thread_root_parent"])
|
|
||||||
|
|
||||||
for root_parent in set(keep_threads):
|
|
||||||
DB.activities.update_many(
|
|
||||||
{"meta.thread_root_parent": root_parent}, {"$set": {"meta.keep": True}}
|
|
||||||
)
|
|
||||||
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": {"$in": [Box.REPLIES.value, Box.INBOX.value]},
|
|
||||||
"meta.keep": {"$exists": False},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.keep": False}},
|
|
||||||
)
|
|
||||||
|
|
||||||
DB.activities.update_many(
|
|
||||||
{
|
|
||||||
"box": Box.OUTBOX.value,
|
|
||||||
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
|
|
||||||
"meta.public": {"$exists": False},
|
|
||||||
},
|
|
||||||
{"$set": {"meta.public": True}},
|
|
||||||
)
|
|
||||||
|
|
||||||
p.push({}, "/task/cleanup_part_2")
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
|
||||||
|
@ -3421,25 +3286,6 @@ def task_cleanup_part_1():
|
||||||
def task_cleanup_part_2():
|
def task_cleanup_part_2():
|
||||||
task = p.parse(request)
|
task = p.parse(request)
|
||||||
app.logger.info(f"task={task!r}")
|
app.logger.info(f"task={task!r}")
|
||||||
d = (datetime.utcnow() - timedelta(days=15)).strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
# Go over the old Create activities
|
|
||||||
for data in DB.activities.find(
|
|
||||||
{
|
|
||||||
"box": Box.INBOX.value,
|
|
||||||
"type": ActivityType.CREATE.value,
|
|
||||||
"meta.keep": False,
|
|
||||||
"activity.published": {"$lt": d},
|
|
||||||
}
|
|
||||||
).limit(5000):
|
|
||||||
# Delete the cached attachment/
|
|
||||||
for grid_item in MEDIA_CACHE.fs.find({"remote_id": data["remote_id"]}):
|
|
||||||
MEDIA_CACHE.fs.delete(grid_item._id)
|
|
||||||
DB.activities.delete_one({"_id": data["_id"]})
|
|
||||||
|
|
||||||
# FIXME(tsileo): cleanup cache from announces object
|
|
||||||
|
|
||||||
p.push({}, "/task/cleanup_part_3")
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
|
||||||
|
@ -3447,20 +3293,4 @@ def task_cleanup_part_2():
|
||||||
def task_cleanup_part_3():
|
def task_cleanup_part_3():
|
||||||
task = p.parse(request)
|
task = p.parse(request)
|
||||||
app.logger.info(f"task={task!r}")
|
app.logger.info(f"task={task!r}")
|
||||||
|
|
||||||
d = (datetime.utcnow() - timedelta(days=15)).strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
# Delete old replies we don't care about
|
|
||||||
DB.activities.delete_many(
|
|
||||||
{"box": Box.REPLIES.value, "meta.keep": False, "activity.published": {"$lt": d}}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove all the attachments no tied to a remote_id (post celery migration)
|
|
||||||
for grid_item in MEDIA_CACHE.fs.find(
|
|
||||||
{"kind": {"$in": ["og", "attachment"]}, "remote_id": {"$exists": False}}
|
|
||||||
):
|
|
||||||
MEDIA_CACHE.fs.delete(grid_item._id)
|
|
||||||
|
|
||||||
# TODO(tsileo): iterator over "actor_icon" and look for unused one in a separate task
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
100
gc.py
Normal file
100
gc.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Any
|
||||||
|
from typing import Dict
|
||||||
|
from typing import List
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from little_boxes import activitypub as ap
|
||||||
|
|
||||||
|
import activitypub
|
||||||
|
from activitypub import Box
|
||||||
|
from config import ID
|
||||||
|
from config import ME
|
||||||
|
from config import MEDIA_CACHE
|
||||||
|
from utils.migrations import DB
|
||||||
|
from utils.migrations import Migration
|
||||||
|
from utils.migrations import logger
|
||||||
|
|
||||||
|
back = activitypub.MicroblogPubBackend()
|
||||||
|
ap.use_backend(back)
|
||||||
|
|
||||||
|
MY_PERSON = ap.Person(**ME)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def threads_of_interest() -> List[str]:
|
||||||
|
out = set()
|
||||||
|
|
||||||
|
# Fetch all the threads we've participed in
|
||||||
|
for data in DB.activities.find(
|
||||||
|
{
|
||||||
|
"meta.thread_root_parent": {"$exists": True},
|
||||||
|
"box": Box.OUTBOX.value,
|
||||||
|
"type": ap.ActivityType.CREATE.value,
|
||||||
|
}
|
||||||
|
):
|
||||||
|
out.add(data["meta"]["thread_root_parent"])
|
||||||
|
|
||||||
|
# Fetch all threads related to bookmarked activities
|
||||||
|
for data in DB.activities.find({"meta.bookmarked": True}):
|
||||||
|
# Keep the replies
|
||||||
|
out.add(data["meta"]["object_id"])
|
||||||
|
# And the whole thread if any
|
||||||
|
if "thread_root_parent" in data["meta"]:
|
||||||
|
out.add(data["meta"]["thread_root_parent"])
|
||||||
|
|
||||||
|
return list(out)
|
||||||
|
|
||||||
|
|
||||||
|
def perform() -> None:
|
||||||
|
d = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d")
|
||||||
|
toi = threads_of_interest()
|
||||||
|
|
||||||
|
# Go over the old Create activities
|
||||||
|
for data in DB.activities.find(
|
||||||
|
{
|
||||||
|
"box": Box.INBOX.value,
|
||||||
|
"type": ap.ActivityType.CREATE.value,
|
||||||
|
"activity.published": {"$lt": d},
|
||||||
|
}
|
||||||
|
).limit(1000):
|
||||||
|
remote_id = data["remote_id"]
|
||||||
|
meta = data["meta"]
|
||||||
|
activity = ap.parse_activity(data["activity"])
|
||||||
|
logger.info(f"{activity}")
|
||||||
|
|
||||||
|
# This activity has been bookmarked, keep it
|
||||||
|
if meta.get("bookmarked"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Inspect the object
|
||||||
|
obj = activity.get_object()
|
||||||
|
|
||||||
|
# This activity mentions the server actor, keep it
|
||||||
|
if obj.has_mention(ID):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# This activity is a direct reply of one the server actor activity, keep it
|
||||||
|
in_reply_to = obj.get_in_reply_to()
|
||||||
|
if in_reply_to and in_reply_to.startswith(ID):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# This activity is part of a thread we want to keep, keep it
|
||||||
|
if in_reply_to and meta.get("thread_root_parent"):
|
||||||
|
thread_root_parent = meta["thread_root_parent"]
|
||||||
|
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# This activity was boosted or liked, keep it
|
||||||
|
if meta.get("boosted") or meta.get("liked"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Delete the cached attachment
|
||||||
|
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}):
|
||||||
|
MEDIA_CACHE.fs.delete(grid_item._id)
|
||||||
|
|
||||||
|
# Delete the activity
|
||||||
|
DB.activities.delete_one({"_id": data["_id"]})
|
|
@ -1,6 +1,6 @@
|
||||||
{% extends "layout.html" %}
|
{% extends "layout.html" %}
|
||||||
{% import 'utils.html' as utils %}
|
{% import 'utils.html' as utils %}
|
||||||
{% block title %}{{ config.NAME }}: {{ note.activity.object.content | html2plaintext | truncate(50) }}{% endblock %}
|
{% block title %}{{ config.NAME }}: "{{ note.activity.object.content | html2plaintext | truncate(50) }}"{% endblock %}
|
||||||
{% block header %}
|
{% block header %}
|
||||||
<meta content="article" property="og:type" />
|
<meta content="article" property="og:type" />
|
||||||
<meta content="{{ note.activity.object.url }}" property="og:url" />
|
<meta content="{{ note.activity.object.url }}" property="og:url" />
|
||||||
|
|
|
@ -42,25 +42,7 @@ class MediaCache(object):
|
||||||
self.fs = gridfs.GridFS(gridfs_db)
|
self.fs = gridfs.GridFS(gridfs_db)
|
||||||
self.user_agent = user_agent
|
self.user_agent = user_agent
|
||||||
|
|
||||||
def cache_og_image(self, url: str) -> None:
|
def cache_og_image(self, url: str, remote_id: str) -> None:
|
||||||
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
|
|
||||||
return
|
|
||||||
i = load(url, self.user_agent)
|
|
||||||
# Save the original attachment (gzipped)
|
|
||||||
i.thumbnail((100, 100))
|
|
||||||
with BytesIO() as buf:
|
|
||||||
with GzipFile(mode="wb", fileobj=buf) as f1:
|
|
||||||
i.save(f1, format=i.format)
|
|
||||||
buf.seek(0)
|
|
||||||
self.fs.put(
|
|
||||||
buf,
|
|
||||||
url=url,
|
|
||||||
size=100,
|
|
||||||
content_type=i.get_format_mimetype(),
|
|
||||||
kind=Kind.OG_IMAGE.value,
|
|
||||||
)
|
|
||||||
|
|
||||||
def cache_og_image2(self, url: str, remote_id: str) -> None:
|
|
||||||
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
|
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
|
||||||
return
|
return
|
||||||
i = load(url, self.user_agent)
|
i = load(url, self.user_agent)
|
||||||
|
@ -79,49 +61,11 @@ class MediaCache(object):
|
||||||
remote_id=remote_id,
|
remote_id=remote_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
def cache_attachment(self, url: str) -> None:
|
def cache_attachment(self, attachment: Dict[str, Any], remote_id: str) -> None:
|
||||||
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
|
|
||||||
return
|
|
||||||
if (
|
|
||||||
url.endswith(".png")
|
|
||||||
or url.endswith(".jpg")
|
|
||||||
or url.endswith(".jpeg")
|
|
||||||
or url.endswith(".gif")
|
|
||||||
):
|
|
||||||
i = load(url, self.user_agent)
|
|
||||||
# Save the original attachment (gzipped)
|
|
||||||
with BytesIO() as buf:
|
|
||||||
f1 = GzipFile(mode="wb", fileobj=buf)
|
|
||||||
i.save(f1, format=i.format)
|
|
||||||
f1.close()
|
|
||||||
buf.seek(0)
|
|
||||||
self.fs.put(
|
|
||||||
buf,
|
|
||||||
url=url,
|
|
||||||
size=None,
|
|
||||||
content_type=i.get_format_mimetype(),
|
|
||||||
kind=Kind.ATTACHMENT.value,
|
|
||||||
)
|
|
||||||
# Save a thumbnail (gzipped)
|
|
||||||
i.thumbnail((720, 720))
|
|
||||||
with BytesIO() as buf:
|
|
||||||
with GzipFile(mode="wb", fileobj=buf) as f1:
|
|
||||||
i.save(f1, format=i.format)
|
|
||||||
buf.seek(0)
|
|
||||||
self.fs.put(
|
|
||||||
buf,
|
|
||||||
url=url,
|
|
||||||
size=720,
|
|
||||||
content_type=i.get_format_mimetype(),
|
|
||||||
kind=Kind.ATTACHMENT.value,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
def cache_attachment2(self, attachment: Dict[str, Any], remote_id: str) -> None:
|
|
||||||
url = attachment["url"]
|
url = attachment["url"]
|
||||||
|
|
||||||
# Ensure it's not already there
|
# Ensure it's not already there
|
||||||
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
|
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value, "remote_id": remote_id}):
|
||||||
return
|
return
|
||||||
|
|
||||||
# If it's an image, make some thumbnails
|
# If it's an image, make some thumbnails
|
||||||
|
@ -231,14 +175,6 @@ class MediaCache(object):
|
||||||
)
|
)
|
||||||
return str(oid)
|
return str(oid)
|
||||||
|
|
||||||
def cache(self, url: str, kind: Kind) -> None:
|
|
||||||
if kind == Kind.ACTOR_ICON:
|
|
||||||
self.cache_actor_icon(url)
|
|
||||||
elif kind == Kind.OG_IMAGE:
|
|
||||||
self.cache_og_image(url)
|
|
||||||
else:
|
|
||||||
self.cache_attachment(url)
|
|
||||||
|
|
||||||
def get_actor_icon(self, url: str, size: int) -> Any:
|
def get_actor_icon(self, url: str, size: int) -> Any:
|
||||||
return self.get_file(url, size, Kind.ACTOR_ICON)
|
return self.get_file(url, size, Kind.ACTOR_ICON)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue