mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-11-15 03:04:28 +00:00
More opengraph tweaks
This commit is contained in:
parent
b43fa4556e
commit
c585f07857
1 changed files with 10 additions and 5 deletions
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import opengraph
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -8,6 +9,8 @@ from little_boxes.urlutils import is_url_valid
|
|||
|
||||
from .lookup import lookup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def links_from_note(note):
|
||||
tags_href = set()
|
||||
|
@ -27,7 +30,7 @@ def links_from_note(note):
|
|||
|
||||
|
||||
def fetch_og_metadata(user_agent, links):
|
||||
htmls = []
|
||||
res = []
|
||||
for l in links:
|
||||
check_url(l)
|
||||
|
||||
|
@ -41,11 +44,13 @@ def fetch_og_metadata(user_agent, links):
|
|||
|
||||
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
||||
r.raise_for_status()
|
||||
htmls.append(r.text)
|
||||
|
||||
res = []
|
||||
for html in htmls:
|
||||
html = r.text
|
||||
try:
|
||||
data = dict(opengraph.OpenGraph(html=html))
|
||||
except Exception:
|
||||
logger.exception("failed to parse {l}")
|
||||
continue
|
||||
if data.get("url"):
|
||||
res.append(data)
|
||||
|
||||
|
|
Loading…
Reference in a new issue