diff --git a/app.py b/app.py index b01992c..d868d95 100644 --- a/app.py +++ b/app.py @@ -238,6 +238,11 @@ def get_attachment_url(url, size): return _get_file_url(url, size, Kind.ATTACHMENT) +@app.template_filter() +def get_og_image_url(url, size=100): + return _get_file_url(url, size, Kind.OG_IMAGE) + + @app.template_filter() def permalink_id(val): return str(hash(val)) diff --git a/sass/base_theme.scss b/sass/base_theme.scss index df2c4b3..e7b5b71 100644 --- a/sass/base_theme.scss +++ b/sass/base_theme.scss @@ -189,9 +189,11 @@ a:hover { h3 { margin: 0; } } } +.note-box { + margin-bottom: 70px; +} .note { display: flex; - margin-bottom: 70px; .l { color: $color-note-link; } @@ -229,7 +231,11 @@ a:hover { padding:10px 0; } } - +.color-menu-background { + background: $color-menu-background; +} +.og-link { text-decoration: none; } +.og-link:hover { text-decoration: none; } .bar-item-no-hover { background: $color-menu-background; padding: 5px; diff --git a/tasks.py b/tasks.py index 3416dce..1ca0134 100644 --- a/tasks.py +++ b/tasks.py @@ -19,6 +19,7 @@ from config import ID from config import KEY from config import MEDIA_CACHE from config import USER_AGENT +from utils import opengraph from utils.media import Kind log = logging.getLogger(__name__) @@ -103,12 +104,49 @@ def process_new_activity(self, iri: str) -> None: self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries)) +@app.task(bind=True, max_retries=12) # noqa: C901 +def fetch_og_metadata(self, iri: str) -> None: + try: + activity = ap.fetch_remote_activity(iri) + log.info(f"activity={activity!r}") + if activity.has_type(ap.ActivityType.CREATE): + note = activity.get_object() + links = opengraph.links_from_note(note.to_dict()) + og_metadata = opengraph.fetch_og_metadata(USER_AGENT, links) + for og in og_metadata: + if not og.get("image"): + continue + MEDIA_CACHE.cache_og_image(og["image"]) + + log.debug(f"OG metadata {og_metadata!r}") + DB.activities.update_one( + {"remote_id": iri}, {"$set": {"meta.og_metadata": og_metadata}} + ) + + log.info(f"OG metadata fetched for {iri}") + except (ActivityGoneError, ActivityNotFoundError): + log.exception(f"dropping activity {iri}, skip OG metedata") + except requests.exceptions.HTTPError as http_err: + if 400 <= http_err.response.status_code < 500: + log.exception("bad request, no retry") + log.exception("failed to fetch OG metadata") + self.retry( + exc=http_err, countdown=int(random.uniform(2, 4) ** self.request.retries) + ) + except Exception as err: + log.exception(f"failed to fetch OG metadata for {iri}") + self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries)) + + @app.task(bind=True, max_retries=12) def cache_actor(self, iri: str, also_cache_attachments: bool = True) -> None: try: activity = ap.fetch_remote_activity(iri) log.info(f"activity={activity!r}") + if activity.has_type(ap.ActivityType.CREATE): + fetch_og_metadata.delay(iri) + actor = activity.get_actor() cache_actor_with_inbox = False diff --git a/templates/utils.html b/templates/utils.html index 3f267b5..db64c04 100644 --- a/templates/utils.html +++ b/templates/utils.html @@ -21,6 +21,7 @@ {% else %} {% set actor = obj.attributedTo | get_actor %} {% endif %} +
@@ -63,6 +64,26 @@
{% endif %} + + +{% if meta and meta.og_metadata %} +{% for og in meta.og_metadata %} + +
+ +
+
+{{ og.title }} +

{{ og.description | truncate(80) }}

+{{ og.site_name }} +
+
+{% endfor %} +{% endif %} + + + +
{% if perma %}{{ obj.published | format_time }} {% if not (obj.id | is_from_outbox) %} @@ -163,10 +184,10 @@ {% endif %} - +
+
- {%- endmacro %} diff --git a/utils/media.py b/utils/media.py index 13e2b0a..6767514 100644 --- a/utils/media.py +++ b/utils/media.py @@ -31,6 +31,7 @@ class Kind(Enum): ATTACHMENT = "attachment" ACTOR_ICON = "actor_icon" UPLOAD = "upload" + OG_IMAGE = "og" class MediaCache(object): @@ -38,6 +39,24 @@ class MediaCache(object): self.fs = gridfs.GridFS(gridfs_db) self.user_agent = user_agent + def cache_og_image(self, url: str) -> None: + if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}): + return + i = load(url, self.user_agent) + # Save the original attachment (gzipped) + i.thumbnail((100, 100)) + with BytesIO() as buf: + with GzipFile(mode="wb", fileobj=buf) as f1: + i.save(f1, format=i.format) + buf.seek(0) + self.fs.put( + buf, + url=url, + size=100, + content_type=i.get_format_mimetype(), + kind=Kind.OG_IMAGE.value, + ) + def cache_attachment(self, url: str) -> None: if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}): return @@ -141,6 +160,8 @@ class MediaCache(object): def cache(self, url: str, kind: Kind) -> None: if kind == Kind.ACTOR_ICON: self.cache_actor_icon(url) + elif kind == Kind.OG_IMAGE: + self.cache_og_image(url) else: self.cache_attachment(url) diff --git a/utils/opengraph.py b/utils/opengraph.py index b543269..762e5ef 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -23,24 +23,11 @@ def links_from_note(note): return links -def fetch_og_metadata(user_agent, col, remote_id): - doc = col.find_one({"remote_id": remote_id}) - if not doc: - raise ValueError - note = doc["activity"]["object"] - print(note) - links = links_from_note(note) - if not links: - return 0 - # FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph +def fetch_og_metadata(user_agent, links): htmls = [] for l in links: check_url(l) - r = requests.get(l, headers={"User-Agent": user_agent}) + r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) r.raise_for_status() htmls.append(r.text) - links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls] - col.update_one( - {"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}} - ) - return len(links) + return [dict(opengraph.OpenGraph(html=html)) for html in htmls]