mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-22 13:14:28 +00:00
Improve opengrah metadata
This commit is contained in:
parent
e28b988a26
commit
73a7a3ee21
2 changed files with 18 additions and 11 deletions
|
@ -315,13 +315,13 @@
|
||||||
<div>
|
<div>
|
||||||
<img src="{{ og_meta.image | media_proxy_url }}" style="max-width:200px;max-height:100px;">
|
<img src="{{ og_meta.image | media_proxy_url }}" style="max-width:200px;max-height:100px;">
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<a href="{{ og_meta.url }}">{{ og_meta.title }}</a>
|
<a href="{{ og_meta.url }}">{{ og_meta.title }}</a>
|
||||||
{% if og_meta.site_name %}
|
{% if og_meta.site_name %}
|
||||||
<small style="display:block;">{{ og_meta.site_name }}</small>
|
<small style="display:block;">{{ og_meta.site_name }}</small>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
@ -15,24 +15,31 @@ from app.utils.url import is_url_valid
|
||||||
class OpenGraphMeta(BaseModel):
|
class OpenGraphMeta(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
title: str
|
title: str
|
||||||
image: str
|
image: str | None
|
||||||
description: str
|
description: str | None
|
||||||
site_name: str | None = None
|
site_name: str
|
||||||
|
|
||||||
|
|
||||||
def _scrap_og_meta(html: str) -> OpenGraphMeta | None:
|
def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
soup = BeautifulSoup(html, "html5lib")
|
soup = BeautifulSoup(html, "html5lib")
|
||||||
ogs = {
|
ogs = {
|
||||||
og.attrs["property"]: og.attrs.get("content")
|
og.attrs["property"]: og.attrs.get("content")
|
||||||
for og in soup.html.head.findAll(property=re.compile(r"^og"))
|
for og in soup.html.head.findAll(property=re.compile(r"^og"))
|
||||||
}
|
}
|
||||||
raw = {}
|
raw = {
|
||||||
|
"url": url,
|
||||||
|
"title": soup.find("title").text,
|
||||||
|
"image": None,
|
||||||
|
"description": None,
|
||||||
|
"site_name": urlparse(url).netloc,
|
||||||
|
}
|
||||||
for field in OpenGraphMeta.__fields__.keys():
|
for field in OpenGraphMeta.__fields__.keys():
|
||||||
og_field = f"og:{field}"
|
og_field = f"og:{field}"
|
||||||
if not ogs.get(og_field) and field != "site_name":
|
if ogs.get(og_field):
|
||||||
return None
|
raw[field] = ogs.get(og_field, None)
|
||||||
|
|
||||||
raw[field] = ogs.get(og_field, None)
|
if "title" not in raw:
|
||||||
|
return None
|
||||||
|
|
||||||
return OpenGraphMeta.parse_obj(raw)
|
return OpenGraphMeta.parse_obj(raw)
|
||||||
|
|
||||||
|
@ -58,7 +65,7 @@ def _urls_from_note(note: ap.RawObject) -> set[str]:
|
||||||
and is_url_valid(h)
|
and is_url_valid(h)
|
||||||
and (
|
and (
|
||||||
not mimetype
|
not mimetype
|
||||||
or mimetype.split("/")[0] in ["image", "video", "audio"]
|
or mimetype.split("/")[0] not in ["image", "video", "audio"]
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
urls.add(h)
|
urls.add(h)
|
||||||
|
@ -81,7 +88,7 @@ async def _og_meta_from_url(url: str) -> OpenGraphMeta | None:
|
||||||
if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"):
|
if not (ct := resp.headers.get("content-type")) or not ct.startswith("text/html"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return _scrap_og_meta(resp.text)
|
return _scrap_og_meta(url, resp.text)
|
||||||
|
|
||||||
|
|
||||||
async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]:
|
async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]:
|
||||||
|
|
Loading…
Reference in a new issue