diff --git a/app/activitypub.py b/app/activitypub.py
index febab1c..61f1e8b 100644
--- a/app/activitypub.py
+++ b/app/activitypub.py
@@ -6,7 +6,6 @@ from typing import Any
import httpx
from loguru import logger
-from markdown import markdown
from app import config
from app.config import ALSO_KNOWN_AS
@@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
from app.config import MOVED_TO
from app.httpsig import auth
from app.key import get_pubkey_as_pem
+from app.source import dedup_tags
from app.source import hashtagify
from app.utils.url import check_url
@@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
+_LOCAL_ACTOR_METADATA = []
+if config.CONFIG.metadata:
+ for kv in config.CONFIG.metadata:
+ kv_value, kv_tags = hashtagify(kv.value)
+ _LOCAL_ACTOR_METADATA.append(
+ {
+ "name": kv.key,
+ "type": "PropertyValue",
+ "value": kv_value,
+ }
+ )
+ _LOCAL_ACTOR_TAGS.extend(kv_tags)
+
ME = {
"@context": AS_EXTENDED_CTX,
@@ -113,7 +126,7 @@ ME = {
"outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME,
"name": config.CONFIG.name,
- "summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
+ "summary": _LOCAL_ACTOR_SUMMARY,
"endpoints": {
# For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL
@@ -121,16 +134,7 @@ ME = {
},
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
- "attachment": [
- {
- "name": kv.key,
- "type": "PropertyValue",
- "value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
- }
- for kv in config.CONFIG.metadata
- ]
- if config.CONFIG.metadata
- else [],
+ "attachment": _LOCAL_ACTOR_METADATA,
"icon": {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image",
@@ -141,7 +145,7 @@ ME = {
"owner": config.ID,
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
},
- "tag": _LOCAL_ACTOR_TAGS,
+ "tag": dedup_tags(_LOCAL_ACTOR_TAGS),
}
if ALSO_KNOWN_AS:
diff --git a/app/source.py b/app/source.py
index 0dea35c..b411b04 100644
--- a/app/source.py
+++ b/app/source.py
@@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
-_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
+_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
+_URL_REGEX = re.compile(
+ "(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
+)
class AutoLink(SpanToken):
parse_inner = False
precedence = 10
- pattern = re.compile(
- "(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
- )
+ pattern = _URL_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@@ -38,7 +39,7 @@ class AutoLink(SpanToken):
class Mention(SpanToken):
parse_inner = False
precedence = 10
- pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
+ pattern = _MENTION_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@@ -47,7 +48,7 @@ class Mention(SpanToken):
class Hashtag(SpanToken):
parse_inner = False
precedence = 10
- pattern = re.compile(r"(#[\d\w]+)")
+ pattern = _HASHTAG_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
def render_hashtag(self, token: Hashtag) -> str:
tag = token.target[1:]
- link = f'#{tag}' # noqa: E501
+ link = f'#{tag}' # noqa: E501
self.tags.append(
- dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag")
+ dict(
+ href=f"{BASE_URL}/t/{tag.lower()}",
+ name=token.target.lower(),
+ type="Hashtag",
+ )
)
return link
@@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
return actors
-def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
- # TODO: fix this, switch to mistletoe?
+def hashtagify(
+ content: str,
+) -> tuple[str, list[dict[str, str]]]:
tags = []
- hashtags = re.findall(_HASHTAG_REGEX, content)
- hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first
- for hashtag in hashtags:
- tag = hashtag[1:]
- link = f'#{tag}' # noqa: E501
- tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
- content = content.replace(hashtag, link)
- return content, tags
+ with CustomRenderer(
+ mentioned_actors={},
+ enable_mentionify=False,
+ enable_hashtagify=True,
+ ) as renderer:
+ rendered_content = renderer.render(Document(content))
+ tags.extend(renderer.tags)
+
+ # Handle custom emoji
+ tags.extend(emoji.tags(content))
+
+ return rendered_content, tags
async def markdownify(
@@ -174,3 +184,17 @@ async def markdownify(
tags.extend(emoji.tags(content))
return rendered_content, tags, list(mentioned_actors.values())
+
+
+def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
+ idx = set()
+ deduped_tags = []
+ for tag in tags:
+ tag_idx = (tag["type"], tag["name"])
+ if tag_idx in idx:
+ continue
+
+ idx.add(tag_idx)
+ deduped_tags.append(tag)
+
+ return deduped_tags