mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-22 13:14:28 +00:00
Fix OG metadata processing
This commit is contained in:
parent
87f035d298
commit
1a7e9e4565
2 changed files with 22 additions and 6 deletions
|
@ -112,10 +112,13 @@ async def process_next_incoming_activity(
|
||||||
if next_activity.ap_object and next_activity.sent_by_ap_actor_id:
|
if next_activity.ap_object and next_activity.sent_by_ap_actor_id:
|
||||||
try:
|
try:
|
||||||
async with db_session.begin_nested():
|
async with db_session.begin_nested():
|
||||||
await save_to_inbox(
|
await asyncio.wait_for(
|
||||||
|
save_to_inbox(
|
||||||
db_session,
|
db_session,
|
||||||
next_activity.ap_object,
|
next_activity.ap_object,
|
||||||
next_activity.sent_by_ap_actor_id,
|
next_activity.sent_by_ap_actor_id,
|
||||||
|
),
|
||||||
|
timeout=60,
|
||||||
)
|
)
|
||||||
except httpx.TimeoutException as exc:
|
except httpx.TimeoutException as exc:
|
||||||
url = exc._request.url if exc._request else None
|
url = exc._request.url if exc._request else None
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -36,7 +37,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
# FIXME some page have no <title>
|
# FIXME some page have no <title>
|
||||||
raw = {
|
raw = {
|
||||||
"url": url,
|
"url": url,
|
||||||
"title": soup.find("title").text,
|
"title": soup.find("title").text.strip(),
|
||||||
"image": None,
|
"image": None,
|
||||||
"description": None,
|
"description": None,
|
||||||
"site_name": urlparse(url).hostname,
|
"site_name": urlparse(url).hostname,
|
||||||
|
@ -124,9 +125,21 @@ async def og_meta_from_note(
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
og_meta = []
|
og_meta = []
|
||||||
urls = await external_urls(db_session, ro)
|
urls = await external_urls(db_session, ro)
|
||||||
|
logger.debug(f"Lookig OG metadata in {urls=}")
|
||||||
for url in urls:
|
for url in urls:
|
||||||
|
logger.debug(f"Processing {url}")
|
||||||
try:
|
try:
|
||||||
maybe_og_meta = await _og_meta_from_url(url)
|
maybe_og_meta = None
|
||||||
|
try:
|
||||||
|
maybe_og_meta = await asyncio.wait_for(
|
||||||
|
_og_meta_from_url(url),
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.info(f"Timing out fetching {url}")
|
||||||
|
except Exception:
|
||||||
|
logger.exception(f"Failed scrap OG meta for {url}")
|
||||||
|
|
||||||
if maybe_og_meta:
|
if maybe_og_meta:
|
||||||
og_meta.append(maybe_og_meta.dict())
|
og_meta.append(maybe_og_meta.dict())
|
||||||
except httpx.HTTPError:
|
except httpx.HTTPError:
|
||||||
|
|
Loading…
Reference in a new issue