mastodon-ld/masto_ld/patterns/tag.py

from typing import List, Optional
import pyparsing as pp
from bs4 import BeautifulSoup
from dataclasses import dataclass

EXAMPLE = '<p># My New Post</p><p><span class="h-card"><a href="https://masto.synthesis-infrastructures.wiki/@jonny" class="u-url mention">@<span>jonny</span></a></span>:wiki:scruffy</p><p>Here is some new post where I talk about being real scruffy</p>'

@dataclass
class Namespaced_Tag:
    username: str
    tags: List[str]

    @classmethod
    def from_html(cls, html:str) -> Optional['Namespaced_Tag']:
        """
        # FIXME: Just finds the first one for now
        """
        soup = BeautifulSoup(html, 'lxml')
        mention = soup.find(class_='h-card')
        if mention is None:
            return None
        username = mention.text
        tags = mention.next_sibling.text.split(':')
        # FIXME: super weak check, just check that the next text starts with a ':'
        if tags[0] != '':
            return None
        tags = tags[1:]
        return Namespaced_Tag(username=username, tags=tags)