Fix OG metadata processing

This commit is contained in:
Thomas Sileo 2022-08-28 19:05:06 +02:00
parent 87f035d298
commit 1a7e9e4565
2 changed files with 22 additions and 6 deletions

View file

@ -112,10 +112,13 @@ async def process_next_incoming_activity(
if next_activity.ap_object and next_activity.sent_by_ap_actor_id:
try:
async with db_session.begin_nested():
await save_to_inbox(
await asyncio.wait_for(
save_to_inbox(
db_session,
next_activity.ap_object,
next_activity.sent_by_ap_actor_id,
),
timeout=60,
)
except httpx.TimeoutException as exc:
url = exc._request.url if exc._request else None

View file

@ -1,3 +1,4 @@
import asyncio
import mimetypes
import re
from typing import Any
@ -36,7 +37,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
# FIXME some page have no <title>
raw = {
"url": url,
"title": soup.find("title").text,
"title": soup.find("title").text.strip(),
"image": None,
"description": None,
"site_name": urlparse(url).hostname,
@ -124,9 +125,21 @@ async def og_meta_from_note(
) -> list[dict[str, Any]]:
og_meta = []
urls = await external_urls(db_session, ro)
logger.debug(f"Lookig OG metadata in {urls=}")
for url in urls:
logger.debug(f"Processing {url}")
try:
maybe_og_meta = await _og_meta_from_url(url)
maybe_og_meta = None
try:
maybe_og_meta = await asyncio.wait_for(
_og_meta_from_url(url),
timeout=5,
)
except asyncio.TimeoutError:
logger.info(f"Timing out fetching {url}")
except Exception:
logger.exception(f"Failed scrap OG meta for {url}")
if maybe_og_meta:
og_meta.append(maybe_og_meta.dict())
except httpx.HTTPError: