forked from forks/microblog.pub
Fix OG metadata processing
This commit is contained in:
parent
87f035d298
commit
1a7e9e4565
2 changed files with 22 additions and 6 deletions
|
@ -112,10 +112,13 @@ async def process_next_incoming_activity(
|
|||
if next_activity.ap_object and next_activity.sent_by_ap_actor_id:
|
||||
try:
|
||||
async with db_session.begin_nested():
|
||||
await save_to_inbox(
|
||||
db_session,
|
||||
next_activity.ap_object,
|
||||
next_activity.sent_by_ap_actor_id,
|
||||
await asyncio.wait_for(
|
||||
save_to_inbox(
|
||||
db_session,
|
||||
next_activity.ap_object,
|
||||
next_activity.sent_by_ap_actor_id,
|
||||
),
|
||||
timeout=60,
|
||||
)
|
||||
except httpx.TimeoutException as exc:
|
||||
url = exc._request.url if exc._request else None
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
import mimetypes
|
||||
import re
|
||||
from typing import Any
|
||||
|
@ -36,7 +37,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
|||
# FIXME some page have no <title>
|
||||
raw = {
|
||||
"url": url,
|
||||
"title": soup.find("title").text,
|
||||
"title": soup.find("title").text.strip(),
|
||||
"image": None,
|
||||
"description": None,
|
||||
"site_name": urlparse(url).hostname,
|
||||
|
@ -124,9 +125,21 @@ async def og_meta_from_note(
|
|||
) -> list[dict[str, Any]]:
|
||||
og_meta = []
|
||||
urls = await external_urls(db_session, ro)
|
||||
logger.debug(f"Lookig OG metadata in {urls=}")
|
||||
for url in urls:
|
||||
logger.debug(f"Processing {url}")
|
||||
try:
|
||||
maybe_og_meta = await _og_meta_from_url(url)
|
||||
maybe_og_meta = None
|
||||
try:
|
||||
maybe_og_meta = await asyncio.wait_for(
|
||||
_og_meta_from_url(url),
|
||||
timeout=5,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.info(f"Timing out fetching {url}")
|
||||
except Exception:
|
||||
logger.exception(f"Failed scrap OG meta for {url}")
|
||||
|
||||
if maybe_og_meta:
|
||||
og_meta.append(maybe_og_meta.dict())
|
||||
except httpx.HTTPError:
|
||||
|
|
Loading…
Reference in a new issue