From e46378286942fff2baff7a17ab2f254bcbe5da74 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Tue, 16 Jul 2019 21:33:32 +0200 Subject: [PATCH] Add webmentions utils --- utils/webmentions.py | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 utils/webmentions.py diff --git a/utils/webmentions.py b/utils/webmentions.py new file mode 100644 index 0000000..307ccc1 --- /dev/null +++ b/utils/webmentions.py @@ -0,0 +1,54 @@ +import logging +from typing import Optional +from urllib.parse import urlparse + +import requests +from bs4 import BeautifulSoup +from little_boxes.urlutils import is_url_valid + +logger = logging.getLogger(__name__) + + +def _make_abs(url: Optional[str], parent: str) -> Optional[str]: + if url is None: + return None + + if url.startswith("http"): + return url + + return ( + urlparse(parent)._replace(path=url, params="", query="", fragment="").geturl() + ) + + +def _discover_webmention_endoint(url: str) -> Optional[str]: + try: + resp = requests.get(url, timeout=3) + except Exception: + return None + + for k, v in resp.links.items(): + if "webmention" in k: + return _make_abs(resp.links[k].get("url"), url) + + soup = BeautifulSoup(resp.text, "html5lib") + wlinks = soup.find_all(["link", "a"], attrs={"rel": "webmention"}) + for wlink in wlinks: + if "href" in wlink.attrs: + return _make_abs(wlink.attrs["href"], url) + + return None + + +def discover_webmention_endoint(url: str) -> Optional[str]: + """Discover the Webmention endpoint of a given URL, if any. + + Passes all the tests at https://webmention.rocks! + + """ + wurl = _discover_webmention_endoint(url) + if wurl is None: + return None + if not is_url_valid(wurl): + return None + return wurl