mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-11-15 03:04:28 +00:00
More opengraph tweaks
This commit is contained in:
parent
b43fa4556e
commit
c585f07857
1 changed files with 10 additions and 5 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
import logging
|
||||||
import opengraph
|
import opengraph
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
@ -8,6 +9,8 @@ from little_boxes.urlutils import is_url_valid
|
||||||
|
|
||||||
from .lookup import lookup
|
from .lookup import lookup
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def links_from_note(note):
|
def links_from_note(note):
|
||||||
tags_href = set()
|
tags_href = set()
|
||||||
|
@ -27,7 +30,7 @@ def links_from_note(note):
|
||||||
|
|
||||||
|
|
||||||
def fetch_og_metadata(user_agent, links):
|
def fetch_og_metadata(user_agent, links):
|
||||||
htmls = []
|
res = []
|
||||||
for l in links:
|
for l in links:
|
||||||
check_url(l)
|
check_url(l)
|
||||||
|
|
||||||
|
@ -41,11 +44,13 @@ def fetch_og_metadata(user_agent, links):
|
||||||
|
|
||||||
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
htmls.append(r.text)
|
|
||||||
|
|
||||||
res = []
|
html = r.text
|
||||||
for html in htmls:
|
try:
|
||||||
data = dict(opengraph.OpenGraph(html=html))
|
data = dict(opengraph.OpenGraph(html=html))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("failed to parse {l}")
|
||||||
|
continue
|
||||||
if data.get("url"):
|
if data.get("url"):
|
||||||
res.append(data)
|
res.append(data)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue