From 8cfac8df6a4cbc005b798fb18ca94919534f33eb Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 4 Aug 2022 07:31:18 +0200 Subject: [PATCH] Allow to replace URL dynamically (for Nitter, Teddit...) --- app/ap_object.py | 19 +++++++++++++++++-- app/config.py | 9 +++++++++ docs/user_guide.md | 22 ++++++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/app/ap_object.py b/app/ap_object.py index 16b91cd..72fd871 100644 --- a/app/ap_object.py +++ b/app/ap_object.py @@ -2,6 +2,7 @@ import hashlib from datetime import datetime from functools import cached_property from typing import Any +from urllib.parse import urlparse import pydantic from bs4 import BeautifulSoup # type: ignore @@ -11,6 +12,7 @@ from app import activitypub as ap from app.actor import LOCAL_ACTOR from app.actor import Actor from app.actor import RemoteActor +from app.config import PRIVACY_REPLACE from app.media import proxied_media_url from app.utils.datetime import now from app.utils.datetime import parse_isoformat @@ -175,9 +177,22 @@ class Object: # PeerTube returns the content as markdown if self.ap_object.get("mediaType") == "text/markdown": - return markdown(content, extensions=["mdx_linkify"]) + content = markdown(content, extensions=["mdx_linkify"]) - return content + if not PRIVACY_REPLACE: + return content + + soup = BeautifulSoup(content, "html5lib") + links = soup.find_all("a", href=True) + + for link in links: + parsed_href = urlparse(link.attrs["href"]) + if new_netloc := PRIVACY_REPLACE.get( + parsed_href.netloc.removeprefix("www.") + ): + link.attrs["href"] = parsed_href._replace(netloc=new_netloc).geturl() + + return soup.find("body").decode_contents() @property def summary(self) -> str | None: diff --git a/app/config.py b/app/config.py index d567e8f..e64060c 100644 --- a/app/config.py +++ b/app/config.py @@ -31,6 +31,11 @@ USER_AGENT = f"microblogpub/{VERSION}" AP_CONTENT_TYPE = "application/activity+json" +class _PrivacyReplace(pydantic.BaseModel): + domain: str + replace_by: str + + class Config(pydantic.BaseModel): domain: str username: str @@ -43,6 +48,7 @@ class Config(pydantic.BaseModel): debug: bool = False trusted_hosts: list[str] = ["127.0.0.1"] manually_approves_followers: bool = False + privacy_replace: list[_PrivacyReplace] | None = None # Config items to make tests easier sqlalchemy_database: str | None = None @@ -84,6 +90,9 @@ _SCHEME = "https" if CONFIG.https else "http" ID = f"{_SCHEME}://{DOMAIN}" USERNAME = CONFIG.username MANUALLY_APPROVES_FOLLOWERS = CONFIG.manually_approves_followers +PRIVACY_REPLACE = None +if CONFIG.privacy_replace: + PRIVACY_REPLACE = {pr.domain: pr.replace_by for pr in CONFIG.privacy_replace} BASE_URL = ID DEBUG = CONFIG.debug DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db" diff --git a/docs/user_guide.md b/docs/user_guide.md index fa3bd9c..7109119 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -31,6 +31,28 @@ You can tweak your profile by tweaking these items: Whenever one of these config items is updated, an `Update` activity will be sent to all know server to update your remote profile. +### Privacy replace + +You can define domain to be rewrited to more "privacy friendly" alternatives, like [Invidious](https://invidious.io/) +or [Nitter](https://nitter.net/about). + +To do so, just add as these extra config items, this is a sample config that rewrite URLs for Twitter, Youtube, Reddit and Medium: + +```toml +[[privacy_replace]] +domain = "youtube.com" +replace_by ="yewtu.be" +[[privacy_replace]] +domain = "twitter.com" +replace_by = "nitter.net" +[[privacy_replace]] +domain = "medium.com" +replace_by = "scribe.rip" +[[privacy_replace]] +domain = "reddit.com" +replace_by = "teddit.net" +``` + ### Customization #### Custom emoji