32 lines
1.0 KiB
Python
32 lines
1.0 KiB
Python
from typing import List, Optional
|
|
import pyparsing as pp
|
|
from bs4 import BeautifulSoup
|
|
from dataclasses import dataclass
|
|
|
|
EXAMPLE = '<p># My New Post</p><p><span class="h-card"><a href="https://masto.synthesis-infrastructures.wiki/@jonny" class="u-url mention">@<span>jonny</span></a></span>:wiki:scruffy</p><p>Here is some new post where I talk about being real scruffy</p>'
|
|
|
|
@dataclass
|
|
class Namespaced_Tag:
|
|
username: str
|
|
tags: List[str]
|
|
|
|
@classmethod
|
|
def from_html(cls, html:str) -> Optional['Namespaced_Tag']:
|
|
"""
|
|
# FIXME: Just finds the first one for now
|
|
"""
|
|
soup = BeautifulSoup(html, 'lxml')
|
|
mention = soup.find(class_='h-card')
|
|
if mention is None:
|
|
return None
|
|
username = mention.text
|
|
tags = mention.next_sibling.text.split(':')
|
|
# FIXME: super weak check, just check that the next text starts with a ':'
|
|
if tags[0] != '':
|
|
return None
|
|
tags = tags[1:]
|
|
return Namespaced_Tag(username=username, tags=tags)
|
|
|
|
|
|
|