mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-22 05:04:27 +00:00
Improve privacy relace
This commit is contained in:
parent
6d2db24c33
commit
d378e17173
5 changed files with 13 additions and 17 deletions
|
@ -2,7 +2,6 @@ import hashlib
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
from bs4 import BeautifulSoup # type: ignore
|
from bs4 import BeautifulSoup # type: ignore
|
||||||
|
@ -12,8 +11,8 @@ from app import activitypub as ap
|
||||||
from app.actor import LOCAL_ACTOR
|
from app.actor import LOCAL_ACTOR
|
||||||
from app.actor import Actor
|
from app.actor import Actor
|
||||||
from app.actor import RemoteActor
|
from app.actor import RemoteActor
|
||||||
from app.config import PRIVACY_REPLACE
|
|
||||||
from app.media import proxied_media_url
|
from app.media import proxied_media_url
|
||||||
|
from app.utils import privacy_replace
|
||||||
from app.utils.datetime import now
|
from app.utils.datetime import now
|
||||||
from app.utils.datetime import parse_isoformat
|
from app.utils.datetime import parse_isoformat
|
||||||
|
|
||||||
|
@ -179,20 +178,7 @@ class Object:
|
||||||
if self.ap_object.get("mediaType") == "text/markdown":
|
if self.ap_object.get("mediaType") == "text/markdown":
|
||||||
content = markdown(content, extensions=["mdx_linkify"])
|
content = markdown(content, extensions=["mdx_linkify"])
|
||||||
|
|
||||||
if not PRIVACY_REPLACE:
|
return privacy_replace.replace_content(content)
|
||||||
return content
|
|
||||||
|
|
||||||
soup = BeautifulSoup(content, "html5lib")
|
|
||||||
links = soup.find_all("a", href=True)
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
parsed_href = urlparse(link.attrs["href"])
|
|
||||||
if new_netloc := PRIVACY_REPLACE.get(
|
|
||||||
parsed_href.netloc.removeprefix("www.")
|
|
||||||
):
|
|
||||||
link.attrs["href"] = parsed_href._replace(netloc=new_netloc).geturl()
|
|
||||||
|
|
||||||
return soup.find("body").decode_contents()
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def summary(self) -> str | None:
|
def summary(self) -> str | None:
|
||||||
|
|
|
@ -32,6 +32,7 @@ from app.config import generate_csrf_token
|
||||||
from app.config import session_serializer
|
from app.config import session_serializer
|
||||||
from app.database import AsyncSession
|
from app.database import AsyncSession
|
||||||
from app.media import proxied_media_url
|
from app.media import proxied_media_url
|
||||||
|
from app.utils import privacy_replace
|
||||||
from app.utils.datetime import now
|
from app.utils.datetime import now
|
||||||
from app.utils.highlight import HIGHLIGHT_CSS
|
from app.utils.highlight import HIGHLIGHT_CSS
|
||||||
from app.utils.highlight import highlight
|
from app.utils.highlight import highlight
|
||||||
|
@ -400,3 +401,4 @@ _templates.env.filters["emojify"] = _emojify
|
||||||
_templates.env.filters["pluralize"] = _pluralize
|
_templates.env.filters["pluralize"] = _pluralize
|
||||||
_templates.env.filters["parse_datetime"] = _parse_datetime
|
_templates.env.filters["parse_datetime"] = _parse_datetime
|
||||||
_templates.env.filters["poll_item_pct"] = _poll_item_pct
|
_templates.env.filters["poll_item_pct"] = _poll_item_pct
|
||||||
|
_templates.env.filters["privacy_replace_url"] = privacy_replace.replace_url
|
||||||
|
|
|
@ -317,7 +317,7 @@
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<a href="{{ og_meta.url }}">{{ og_meta.title }}</a>
|
<a href="{{ og_meta.url | privacy_replace_url }}">{{ og_meta.title }}</a>
|
||||||
{% if og_meta.site_name %}
|
{% if og_meta.site_name %}
|
||||||
<small style="display:block;">{{ og_meta.site_name }}</small>
|
<small style="display:block;">{{ og_meta.site_name }}</small>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
@ -15,6 +15,7 @@ from app.database import AsyncSession
|
||||||
from app.models import InboxObject
|
from app.models import InboxObject
|
||||||
from app.models import OutboxObject
|
from app.models import OutboxObject
|
||||||
from app.utils.url import is_url_valid
|
from app.utils.url import is_url_valid
|
||||||
|
from app.utils.url import make_abs
|
||||||
|
|
||||||
|
|
||||||
class OpenGraphMeta(BaseModel):
|
class OpenGraphMeta(BaseModel):
|
||||||
|
@ -46,6 +47,10 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
if "title" not in raw:
|
if "title" not in raw:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
for maybe_rel in {"url", "image"}:
|
||||||
|
if u := raw.get(maybe_rel):
|
||||||
|
raw[maybe_rel] = make_abs(u, url)
|
||||||
|
|
||||||
return OpenGraphMeta.parse_obj(raw)
|
return OpenGraphMeta.parse_obj(raw)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,9 @@ To do so, just add as these extra config items, this is a sample config that rew
|
||||||
domain = "youtube.com"
|
domain = "youtube.com"
|
||||||
replace_by ="yewtu.be"
|
replace_by ="yewtu.be"
|
||||||
[[privacy_replace]]
|
[[privacy_replace]]
|
||||||
|
domain = "youtu.be"
|
||||||
|
replace_by = "yewtu.be"
|
||||||
|
[[privacy_replace]]
|
||||||
domain = "twitter.com"
|
domain = "twitter.com"
|
||||||
replace_by = "nitter.net"
|
replace_by = "nitter.net"
|
||||||
[[privacy_replace]]
|
[[privacy_replace]]
|
||||||
|
|
Loading…
Reference in a new issue