mastodon-ld/masto_ld/patterns/tag.py

32 lines
1.0 KiB
Python

from typing import List, Optional
import pyparsing as pp
from bs4 import BeautifulSoup
from dataclasses import dataclass
EXAMPLE = '<p># My New Post</p><p><span class="h-card"><a href="https://masto.synthesis-infrastructures.wiki/@jonny" class="u-url mention">@<span>jonny</span></a></span>:wiki:scruffy</p><p>Here is some new post where I talk about being real scruffy</p>'
@dataclass
class Namespaced_Tag:
username: str
tags: List[str]
@classmethod
def from_html(cls, html:str) -> Optional['Namespaced_Tag']:
"""
# FIXME: Just finds the first one for now
"""
soup = BeautifulSoup(html, 'lxml')
mention = soup.find(class_='h-card')
if mention is None:
return None
username = mention.text
tags = mention.next_sibling.text.split(':')
# FIXME: super weak check, just check that the next text starts with a ':'
if tags[0] != '':
return None
tags = tags[1:]
return Namespaced_Tag(username=username, tags=tags)