Improve opengrah metadata

This commit is contained in:
Thomas Sileo 2022-08-02 22:22:15 +02:00
parent e28b988a26
commit 73a7a3ee21
2 changed files with 18 additions and 11 deletions

View file

@ -315,13 +315,13 @@
<div> <div>
<img src="{{ og_meta.image | media_proxy_url }}" style="max-width:200px;max-height:100px;"> <img src="{{ og_meta.image | media_proxy_url }}" style="max-width:200px;max-height:100px;">
</div> </div>
{% endif %}
<div> <div>
<a href="{{ og_meta.url }}">{{ og_meta.title }}</a> <a href="{{ og_meta.url }}">{{ og_meta.title }}</a>
{% if og_meta.site_name %} {% if og_meta.site_name %}
<small style="display:block;">{{ og_meta.site_name }}</small> <small style="display:block;">{{ og_meta.site_name }}</small>
{% endif %} {% endif %}
</div> </div>
{% endif %}
</div> </div>
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -15,24 +15,31 @@ from app.utils.url import is_url_valid
class OpenGraphMeta(BaseModel): class OpenGraphMeta(BaseModel):
url: str url: str
title: str title: str
image: str image: str | None
description: str description: str | None
site_name: str | None = None site_name: str
def _scrap_og_meta(html: str) -> OpenGraphMeta | None: def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
soup = BeautifulSoup(html, "html5lib") soup = BeautifulSoup(html, "html5lib")
ogs = { ogs = {
og.attrs["property"]: og.attrs.get("content") og.attrs["property"]: og.attrs.get("content")
for og in soup.html.head.findAll(property=re.compile(r"^og")) for og in soup.html.head.findAll(property=re.compile(r"^og"))
} }
raw = {} raw = {
"url": url,
"title": soup.find("title").text,
"image": None,
"description": None,
"site_name": urlparse(url).netloc,
}
for field in OpenGraphMeta.__fields__.keys(): for field in OpenGraphMeta.__fields__.keys():
og_field = f"og:{field}" og_field = f"og:{field}"
if not ogs.get(og_field) and field != "site_name": if ogs.get(og_field):
return None raw[field] = ogs.get(og_field, None)
raw[field] = ogs.get(og_field, None) if "title" not in raw:
return None
return OpenGraphMeta.parse_obj(raw) return OpenGraphMeta.parse_obj(raw)
@ -58,7 +65,7 @@ def _urls_from_note(note: ap.RawObject) -> set[str]:
and is_url_valid(h) and is_url_valid(h)
and ( and (
not mimetype not mimetype
or mimetype.split("/")[0] in ["image", "video", "audio"] or mimetype.split("/")[0] not in ["image", "video", "audio"]
) )
): ):
urls.add(h) urls.add(h)
@ -81,7 +88,7 @@ async def _og_meta_from_url(url: str) -> OpenGraphMeta | None:
if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"): if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"):
return None return None
return _scrap_og_meta(resp.text) return _scrap_og_meta(url, resp.text)
async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]: async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]: