More Markdown improvements

This commit is contained in:
Thomas Sileo 2022-10-05 20:05:16 +02:00
parent 881d0ad899
commit 5eaa0f291b
2 changed files with 59 additions and 31 deletions

View file

@ -6,7 +6,6 @@ from typing import Any
import httpx import httpx
from loguru import logger from loguru import logger
from markdown import markdown
from app import config from app import config
from app.config import ALSO_KNOWN_AS from app.config import ALSO_KNOWN_AS
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
from app.config import MOVED_TO from app.config import MOVED_TO
from app.httpsig import auth from app.httpsig import auth
from app.key import get_pubkey_as_pem from app.key import get_pubkey_as_pem
from app.source import dedup_tags
from app.source import hashtagify from app.source import hashtagify
from app.utils.url import check_url from app.utils.url import check_url
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary) _LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
_LOCAL_ACTOR_METADATA = []
if config.CONFIG.metadata:
for kv in config.CONFIG.metadata:
kv_value, kv_tags = hashtagify(kv.value)
_LOCAL_ACTOR_METADATA.append(
{
"name": kv.key,
"type": "PropertyValue",
"value": kv_value,
}
)
_LOCAL_ACTOR_TAGS.extend(kv_tags)
ME = { ME = {
"@context": AS_EXTENDED_CTX, "@context": AS_EXTENDED_CTX,
@ -113,7 +126,7 @@ ME = {
"outbox": config.BASE_URL + "/outbox", "outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME, "preferredUsername": config.USERNAME,
"name": config.CONFIG.name, "name": config.CONFIG.name,
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]), "summary": _LOCAL_ACTOR_SUMMARY,
"endpoints": { "endpoints": {
# For compat with servers expecting a sharedInbox... # For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL "sharedInbox": config.BASE_URL
@ -121,16 +134,7 @@ ME = {
}, },
"url": config.ID + "/", # XXX: the path is important for Mastodon compat "url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers, "manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
"attachment": [ "attachment": _LOCAL_ACTOR_METADATA,
{
"name": kv.key,
"type": "PropertyValue",
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
}
for kv in config.CONFIG.metadata
]
if config.CONFIG.metadata
else [],
"icon": { "icon": {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0], "mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image", "type": "Image",
@ -141,7 +145,7 @@ ME = {
"owner": config.ID, "owner": config.ID,
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH), "publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
}, },
"tag": _LOCAL_ACTOR_TAGS, "tag": dedup_tags(_LOCAL_ACTOR_TAGS),
} }
if ALSO_KNOWN_AS: if ALSO_KNOWN_AS:

View file

@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME) _FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)") _HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+") _MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
_URL_REGEX = re.compile(
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
class AutoLink(SpanToken): class AutoLink(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile( pattern = _URL_REGEX
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
class Mention(SpanToken): class Mention(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)") pattern = _MENTION_REGEX
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -47,7 +48,7 @@ class Mention(SpanToken):
class Hashtag(SpanToken): class Hashtag(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile(r"(#[\d\w]+)") pattern = _HASHTAG_REGEX
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
def render_hashtag(self, token: Hashtag) -> str: def render_hashtag(self, token: Hashtag) -> str:
tag = token.target[1:] tag = token.target[1:]
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501 link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
self.tags.append( self.tags.append(
dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag") dict(
href=f"{BASE_URL}/t/{tag.lower()}",
name=token.target.lower(),
type="Hashtag",
)
) )
return link return link
@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
return actors return actors
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]: def hashtagify(
# TODO: fix this, switch to mistletoe? content: str,
) -> tuple[str, list[dict[str, str]]]:
tags = [] tags = []
hashtags = re.findall(_HASHTAG_REGEX, content) with CustomRenderer(
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first mentioned_actors={},
for hashtag in hashtags: enable_mentionify=False,
tag = hashtag[1:] enable_hashtagify=True,
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501 ) as renderer:
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag")) rendered_content = renderer.render(Document(content))
content = content.replace(hashtag, link) tags.extend(renderer.tags)
return content, tags
# Handle custom emoji
tags.extend(emoji.tags(content))
return rendered_content, tags
async def markdownify( async def markdownify(
@ -174,3 +184,17 @@ async def markdownify(
tags.extend(emoji.tags(content)) tags.extend(emoji.tags(content))
return rendered_content, tags, list(mentioned_actors.values()) return rendered_content, tags, list(mentioned_actors.values())
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
idx = set()
deduped_tags = []
for tag in tags:
tag_idx = (tag["type"], tag["name"])
if tag_idx in idx:
continue
idx.add(tag_idx)
deduped_tags.append(tag)
return deduped_tags