microblog.pub/app/utils/url.py

80 lines
2 KiB
Python
Raw Permalink Normal View History

2022-06-22 18:11:22 +00:00
import functools
import ipaddress
import socket
from urllib.parse import urlparse
from loguru import logger
2022-08-15 08:15:00 +00:00
from app.config import BLOCKED_SERVERS
2022-06-22 18:11:22 +00:00
from app.config import DEBUG
2022-07-10 09:04:28 +00:00
def make_abs(url: str | None, parent: str) -> str | None:
if url is None:
return None
if url.startswith("http"):
return url
return (
urlparse(parent)._replace(path=url, params="", query="", fragment="").geturl()
)
2022-06-22 18:11:22 +00:00
class InvalidURLError(Exception):
pass
2022-07-15 18:50:27 +00:00
@functools.lru_cache(maxsize=256)
2022-06-22 18:11:22 +00:00
def _getaddrinfo(hostname: str, port: int) -> str:
try:
ip_address = str(ipaddress.ip_address(hostname))
except ValueError:
try:
ip_address = socket.getaddrinfo(hostname, port)[0][4][0]
logger.debug(f"DNS lookup: {hostname} -> {ip_address}")
except socket.gaierror:
logger.exception(f"failed to lookup addr info for {hostname}")
raise
return ip_address
def is_url_valid(url: str) -> bool:
"""Implements basic SSRF protection."""
parsed = urlparse(url)
if parsed.scheme not in ["http", "https"]:
return False
# XXX in debug mode, we want to allow requests to localhost to test the
# federation with local instances
if DEBUG: # pragma: no cover
return True
if not parsed.hostname or parsed.hostname.lower() in ["localhost"]:
return False
2022-08-15 08:15:00 +00:00
if parsed.hostname in BLOCKED_SERVERS:
logger.warning(f"{parsed.hostname} is blocked")
return False
2022-06-22 18:11:22 +00:00
ip_address = _getaddrinfo(
parsed.hostname, parsed.port or (80 if parsed.scheme == "http" else 443)
)
logger.debug(f"{ip_address=}")
if ipaddress.ip_address(ip_address).is_private:
logger.info(f"rejecting private URL {url} -> {ip_address}")
return False
return True
2022-07-15 18:55:37 +00:00
@functools.lru_cache(maxsize=512)
2022-07-15 18:50:27 +00:00
def check_url(url: str) -> None:
2022-06-22 18:11:22 +00:00
logger.debug(f"check_url {url=}")
if not is_url_valid(url):
raise InvalidURLError(f'"{url}" is invalid')
return None