Improve privacy relace

This commit is contained in:
Thomas Sileo 2022-08-04 19:11:14 +02:00
parent 6d2db24c33
commit d378e17173
5 changed files with 13 additions and 17 deletions

View file

@ -2,7 +2,6 @@ import hashlib
from datetime import datetime from datetime import datetime
from functools import cached_property from functools import cached_property
from typing import Any from typing import Any
from urllib.parse import urlparse
import pydantic import pydantic
from bs4 import BeautifulSoup # type: ignore from bs4 import BeautifulSoup # type: ignore
@ -12,8 +11,8 @@ from app import activitypub as ap
from app.actor import LOCAL_ACTOR from app.actor import LOCAL_ACTOR
from app.actor import Actor from app.actor import Actor
from app.actor import RemoteActor from app.actor import RemoteActor
from app.config import PRIVACY_REPLACE
from app.media import proxied_media_url from app.media import proxied_media_url
from app.utils import privacy_replace
from app.utils.datetime import now from app.utils.datetime import now
from app.utils.datetime import parse_isoformat from app.utils.datetime import parse_isoformat
@ -179,20 +178,7 @@ class Object:
if self.ap_object.get("mediaType") == "text/markdown": if self.ap_object.get("mediaType") == "text/markdown":
content = markdown(content, extensions=["mdx_linkify"]) content = markdown(content, extensions=["mdx_linkify"])
if not PRIVACY_REPLACE: return privacy_replace.replace_content(content)
return content
soup = BeautifulSoup(content, "html5lib")
links = soup.find_all("a", href=True)
for link in links:
parsed_href = urlparse(link.attrs["href"])
if new_netloc := PRIVACY_REPLACE.get(
parsed_href.netloc.removeprefix("www.")
):
link.attrs["href"] = parsed_href._replace(netloc=new_netloc).geturl()
return soup.find("body").decode_contents()
@property @property
def summary(self) -> str | None: def summary(self) -> str | None:

View file

@ -32,6 +32,7 @@ from app.config import generate_csrf_token
from app.config import session_serializer from app.config import session_serializer
from app.database import AsyncSession from app.database import AsyncSession
from app.media import proxied_media_url from app.media import proxied_media_url
from app.utils import privacy_replace
from app.utils.datetime import now from app.utils.datetime import now
from app.utils.highlight import HIGHLIGHT_CSS from app.utils.highlight import HIGHLIGHT_CSS
from app.utils.highlight import highlight from app.utils.highlight import highlight
@ -400,3 +401,4 @@ _templates.env.filters["emojify"] = _emojify
_templates.env.filters["pluralize"] = _pluralize _templates.env.filters["pluralize"] = _pluralize
_templates.env.filters["parse_datetime"] = _parse_datetime _templates.env.filters["parse_datetime"] = _parse_datetime
_templates.env.filters["poll_item_pct"] = _poll_item_pct _templates.env.filters["poll_item_pct"] = _poll_item_pct
_templates.env.filters["privacy_replace_url"] = privacy_replace.replace_url

View file

@ -317,7 +317,7 @@
</div> </div>
{% endif %} {% endif %}
<div> <div>
<a href="{{ og_meta.url }}">{{ og_meta.title }}</a> <a href="{{ og_meta.url | privacy_replace_url }}">{{ og_meta.title }}</a>
{% if og_meta.site_name %} {% if og_meta.site_name %}
<small style="display:block;">{{ og_meta.site_name }}</small> <small style="display:block;">{{ og_meta.site_name }}</small>
{% endif %} {% endif %}

View file

@ -15,6 +15,7 @@ from app.database import AsyncSession
from app.models import InboxObject from app.models import InboxObject
from app.models import OutboxObject from app.models import OutboxObject
from app.utils.url import is_url_valid from app.utils.url import is_url_valid
from app.utils.url import make_abs
class OpenGraphMeta(BaseModel): class OpenGraphMeta(BaseModel):
@ -46,6 +47,10 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
if "title" not in raw: if "title" not in raw:
return None return None
for maybe_rel in {"url", "image"}:
if u := raw.get(maybe_rel):
raw[maybe_rel] = make_abs(u, url)
return OpenGraphMeta.parse_obj(raw) return OpenGraphMeta.parse_obj(raw)

View file

@ -43,6 +43,9 @@ To do so, just add as these extra config items, this is a sample config that rew
domain = "youtube.com" domain = "youtube.com"
replace_by ="yewtu.be" replace_by ="yewtu.be"
[[privacy_replace]] [[privacy_replace]]
domain = "youtu.be"
replace_by = "yewtu.be"
[[privacy_replace]]
domain = "twitter.com" domain = "twitter.com"
replace_by = "nitter.net" replace_by = "nitter.net"
[[privacy_replace]] [[privacy_replace]]