mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-22 13:14:28 +00:00
Fix OG meta
This commit is contained in:
parent
6b8a4d94b2
commit
5fc06ccfac
1 changed files with 7 additions and 1 deletions
|
@ -5,6 +5,7 @@ from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from bs4 import BeautifulSoup # type: ignore
|
from bs4 import BeautifulSoup # type: ignore
|
||||||
|
from loguru import logger
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from app import ap_object
|
from app import ap_object
|
||||||
|
@ -32,6 +33,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
og.attrs["property"]: og.attrs.get("content")
|
og.attrs["property"]: og.attrs.get("content")
|
||||||
for og in soup.html.head.findAll(property=re.compile(r"^og"))
|
for og in soup.html.head.findAll(property=re.compile(r"^og"))
|
||||||
}
|
}
|
||||||
|
# FIXME some page have no <title>
|
||||||
raw = {
|
raw = {
|
||||||
"url": url,
|
"url": url,
|
||||||
"title": soup.find("title").text,
|
"title": soup.find("title").text,
|
||||||
|
@ -109,7 +111,11 @@ async def _og_meta_from_url(url: str) -> OpenGraphMeta | None:
|
||||||
if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"):
|
if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return _scrap_og_meta(url, resp.text)
|
try:
|
||||||
|
return _scrap_og_meta(url, resp.text)
|
||||||
|
except Exception:
|
||||||
|
logger.info(f"Failed to scrap OG meta for {url}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def og_meta_from_note(
|
async def og_meta_from_note(
|
||||||
|
|
Loading…
Reference in a new issue