diff --git a/blueprints/tasks.py b/blueprints/tasks.py
index cc40f2a..e47e218 100644
--- a/blueprints/tasks.py
+++ b/blueprints/tasks.py
@@ -7,6 +7,7 @@ from typing import Dict
import flask
import requests
+from bs4 import BeautifulSoup
from flask import current_app as app
from little_boxes import activitypub as ap
from little_boxes.activitypub import _to_list
@@ -271,7 +272,9 @@ def select_video_to_cache(links):
return videos[0]
-@blueprint.route("/task/cache_attachments", methods=["POST"])
+@blueprint.route(
+ "/task/cache_attachments", methods=["POST"]
+) # noqa: C910 # too complex
def task_cache_attachments() -> _Response:
task = p.parse(flask.request)
app.logger.info(f"task={task!r}")
@@ -286,6 +289,13 @@ def task_cache_attachments() -> _Response:
else:
obj = activity
+ if obj.content:
+ content_html = BeautifulSoup(obj.content, "html5lib")
+ for img in content_html.find_all("img"):
+ src = img.attrs.get("src")
+ if src:
+ Tasks.cache_attachment({"url": src}, iri)
+
if obj.has_type(ap.ActivityType.VIDEO):
if isinstance(obj.url, list):
# TODO: filter only videogt
diff --git a/templates/utils.html b/templates/utils.html
index e0b774e..928466a 100644
--- a/templates/utils.html
+++ b/templates/utils.html
@@ -137,7 +137,7 @@
{% if obj | has_type(['Article', 'Page']) %}
{{ obj.name }} {{ obj | url_or_id | get_url }}
{% elif obj | has_type('Question') %}
- {{ obj.content | clean | replace_custom_emojis(obj) | code_highlight | safe }}
+ {{ obj.content | clean | update_inline_imgs | replace_custom_emojis(obj) | code_highlight | safe }}
@@ -206,7 +206,7 @@
{% else %}
- {{ obj.content | clean | replace_custom_emojis(obj) | code_highlight | safe }}
+ {{ obj.content | clean | update_inline_imgs | replace_custom_emojis(obj) | code_highlight | safe }}
{% endif %}
{% if obj | has_place %}
diff --git a/utils/template_filters.py b/utils/template_filters.py
index 6cf6d75..b2fb0eb 100644
--- a/utils/template_filters.py
+++ b/utils/template_filters.py
@@ -9,6 +9,7 @@ import emoji_unicode
import flask
import html2text
import timeago
+from bs4 import BeautifulSoup
from cachetools import LRUCache
from little_boxes import activitypub as ap
from little_boxes.activitypub import _to_list
@@ -104,8 +105,16 @@ ALLOWED_TAGS = [
"tfoot",
"colgroup",
"caption",
+ "img",
]
+ALLOWED_ATTRIBUTES = {
+ "a": ["href", "title"],
+ "abbr": ["title"],
+ "acronym": ["title"],
+ "img": ["src", "alt", "title"],
+}
+
@filters.app_template_filter()
def replace_custom_emojis(content, note):
@@ -126,7 +135,9 @@ def replace_custom_emojis(content, note):
def clean_html(html):
try:
- return bleach.clean(html, tags=ALLOWED_TAGS, strip=True)
+ return bleach.clean(
+ html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, strip=True
+ )
except Exception:
return "failed to clean HTML"
@@ -333,6 +344,21 @@ def get_attachment_url(url, size):
return _get_file_url(url, size, Kind.ATTACHMENT)
+@filters.app_template_filter()
+def update_inline_imgs(content):
+ soup = BeautifulSoup(content)
+ imgs = soup.find_all("img")
+ if not imgs:
+ return content
+ for img in imgs:
+ if not img.attrs.get("src"):
+ continue
+
+ img.attrs["src"] = _get_file_url(img.attrs["src"], 720, Kind.ATTACHMENT)
+
+ return soup.find("body").decode_contents()
+
+
@filters.app_template_filter()
def get_video_url(url):
if isinstance(url, list):