working, but extremely slow and non-async feed grabber. why doesnt sqlmodel have freaking upsert lmao
This commit is contained in:
parent
d832e3a93e
commit
d274658bca
17 changed files with 667 additions and 413 deletions
|
@ -0,0 +1,2 @@
|
|||
|
||||
|
|
@ -1,12 +1,13 @@
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from pydantic import BaseSettings, AnyHttpUrl, EmailStr
|
||||
from pydantic import AnyHttpUrl, EmailStr, Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
class Config(BaseSettings):
|
||||
MASTO_URL:AnyHttpUrl
|
||||
MASTO_URL:str
|
||||
MASTO_TOKEN: Optional[str] = None
|
||||
LOGDIR:Path = Path().home() / '.diyalgo'
|
||||
DB:Optional[Path] = Path().home() / '.diyalgo' / 'diyalgo.db'
|
||||
DB: Optional[Path] = Field(default=Path().home() / '.diyalgo' / 'diyalgo.db')
|
||||
"""
|
||||
Optional, if set to ``None`` , use the in-memory sqlite DB
|
||||
"""
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import List, Literal
|
||||
from typing import List, Literal, Generator, Optional
|
||||
import pdb
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from mastodon import Mastodon
|
||||
|
||||
|
@ -7,13 +8,39 @@ from diyalgo.models import Status
|
|||
|
||||
TIMELINES = Literal['home', 'local', 'public', 'tag', 'hashtag', 'list', 'id']
|
||||
|
||||
|
||||
def fetch_timeline(
|
||||
client:Mastodon,
|
||||
timeline:TIMELINES="public",
|
||||
after: Optional[datetime] = datetime.now(timezone.utc) - timedelta(days=1),
|
||||
**kwargs
|
||||
) -> List[Status]:
|
||||
tl = client.timeline(timeline=timeline, **kwargs)
|
||||
tl = client.fetch_remaining(tl)
|
||||
pdb.set_trace()
|
||||
tl = [Status(**status) for status in tl]
|
||||
return tl
|
||||
) -> Generator[List[Status], None, None]:
|
||||
|
||||
next_tl = client.timeline(timeline=timeline, **kwargs)
|
||||
yield pack_statuses(next_tl)
|
||||
last_tl = next_tl
|
||||
|
||||
while next_tl[-1]['created_at'] > after:
|
||||
next_tl = client.fetch_next(last_tl)
|
||||
if next_tl is None:
|
||||
raise StopIteration()
|
||||
|
||||
yield pack_statuses(next_tl)
|
||||
last_tl = next_tl
|
||||
|
||||
def pack_statuses(statuses:list[dict]) -> list[Status]:
|
||||
out = []
|
||||
for s in statuses:
|
||||
if s['id'] not in [i.id for i in out]:
|
||||
reblog = None
|
||||
if s.get('reblog', None):
|
||||
reblog = Status(**s.get('reblog'))
|
||||
if reblog.id not in [i.id for i in out]:
|
||||
out.append(reblog)
|
||||
del s['reblog']
|
||||
status = Status(**s)
|
||||
status.reblog = reblog
|
||||
out.append(status)
|
||||
|
||||
return out
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ class Account(SQLModel, table=True):
|
|||
statuses: List['Status'] = Relationship(back_populates='account')
|
||||
statuses_count: int
|
||||
suspended: Optional[bool] = None
|
||||
url: AnyHttpUrl
|
||||
url: str
|
||||
username: str
|
||||
|
||||
# class Config:
|
||||
|
|
|
@ -6,11 +6,11 @@ if TYPE_CHECKING:
|
|||
|
||||
class MediaAttachment(SQLModel, table=True):
|
||||
id: int = Field(primary_key=True)
|
||||
blurhash: str
|
||||
description: str
|
||||
blurhash: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
# meta: dict
|
||||
preview_url: str
|
||||
remote_url: str
|
||||
preview_url: Optional[str] = None
|
||||
remote_url: Optional[str] = None
|
||||
type: str #Literal['unknown', 'image', 'gifv', 'video', 'audio']
|
||||
url: str
|
||||
status_id: Optional[int] = Field(default=None, foreign_key='status.id')
|
||||
|
|
|
@ -13,7 +13,7 @@ class CustomEmoji(SQLModel, table=True):
|
|||
url: str
|
||||
static_url: str
|
||||
visible_in_picker: bool
|
||||
category: str
|
||||
category: Optional[str] = None
|
||||
|
||||
accounts: List['Account'] = Relationship(back_populates='emojis', link_model=EmojiAccountLink)
|
||||
statuses: List['Status'] = Relationship(back_populates='emojis', link_model=EmojiStatusLink)
|
|
@ -18,8 +18,8 @@ class Poll(SQLModel, table=True):
|
|||
expires_at: Optional[datetime] = None
|
||||
expired: bool
|
||||
multiple: bool
|
||||
options: List[PollOption] = Relationship(back_populates='poll')
|
||||
own_votes: List[int] = Field(default_factory=list)
|
||||
options: list["PollOption"] = Relationship(back_populates='poll')
|
||||
#own_votes: list[int] = Field(default_factory=list)
|
||||
voted: Optional[bool] = None
|
||||
votes_count: int
|
||||
voters_count: Optional[int] = None
|
||||
|
|
|
@ -24,6 +24,7 @@ class Status(SQLModel, table=True):
|
|||
|
||||
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
|
||||
"""
|
||||
|
||||
id: int = Field(primary_key=True)
|
||||
# application: Optional[dict] = None
|
||||
account_id: Optional[int] = Field(default=None, foreign_key='account.id')
|
||||
|
@ -44,7 +45,18 @@ class Status(SQLModel, table=True):
|
|||
muted: Optional[bool] = None
|
||||
pinned: Optional[bool] = None
|
||||
# poll: Optional['Poll'] = Relationship(back_populates='status')
|
||||
reblog: Optional[bool] = None
|
||||
reblog_id: Optional[int] = Field(
|
||||
foreign_key='status.id',
|
||||
default=None,
|
||||
nullable=True
|
||||
)
|
||||
reblog: Optional['Status'] = Relationship(
|
||||
back_populates='reblogged_by',
|
||||
sa_relationship_kwargs = {
|
||||
'remote_side': 'Status.id'
|
||||
}
|
||||
)
|
||||
reblogged_by: Optional[List['Status']] = Relationship(back_populates='reblog')
|
||||
reblogged: Optional[bool] = None
|
||||
reblogs_count: int
|
||||
replies_count: int
|
||||
|
@ -53,7 +65,7 @@ class Status(SQLModel, table=True):
|
|||
tags: List['Tag'] = Relationship(back_populates='statuses', link_model=TagStatusLink)
|
||||
text: Optional[str] = None
|
||||
uri: str
|
||||
url: str
|
||||
url: Optional[str] = None
|
||||
visibility: str #Literal['public', 'unlisted', 'private', 'direct']
|
||||
|
||||
@property
|
||||
|
|
0
diyalgo/workers/__init__.py
Normal file
0
diyalgo/workers/__init__.py
Normal file
27
diyalgo/workers/timeline.py
Normal file
27
diyalgo/workers/timeline.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
import pdb
|
||||
|
||||
from mastodon import Mastodon
|
||||
from sqlmodel import Session, select
|
||||
from tqdm.asyncio import tqdm
|
||||
|
||||
from diyalgo.expansions.timeline import fetch_timeline
|
||||
from diyalgo.models.status import Status
|
||||
|
||||
def populate_timeline(
|
||||
client: Mastodon,
|
||||
session: Session,
|
||||
**kwargs
|
||||
):
|
||||
# try:
|
||||
for posts in fetch_timeline(client, 'public', **kwargs):
|
||||
for post in posts:
|
||||
if post.reblog is not None:
|
||||
statement = select(Status).where(Status.id == post.reblog.id)
|
||||
existing_reblog = session.exec(statement).first()
|
||||
if existing_reblog is not None:
|
||||
post.reblog = existing_reblog
|
||||
statement = select(Status).where(Status.id == post.id)
|
||||
existing = session.exec(statement).first()
|
||||
if existing is None:
|
||||
session.add(post)
|
||||
session.commit()
|
943
poetry.lock
generated
943
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -10,13 +10,15 @@ keywords = ["mastodon", "fediverse", "algorithm", "algorithms", "social media"]
|
|||
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
python = "^3.11"
|
||||
"Mastodon.py" = "^1.8.0"
|
||||
pydantic = "^1.10.4"
|
||||
sqlmodel = "^0.0.8"
|
||||
pydantic = ">=2.0.0"
|
||||
sqlmodel = ">=0.0.14"
|
||||
beautifulsoup4 = "^4.11.1"
|
||||
lxml = "^4.9.2"
|
||||
python-dotenv = "^0.21.0"
|
||||
python-dotenv = ">=1.0.0"
|
||||
pydantic-settings = "^2.1.0"
|
||||
tqdm = "^4.66.1"
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
|
3
tests/fixtures/__init__.py
vendored
3
tests/fixtures/__init__.py
vendored
|
@ -0,0 +1,3 @@
|
|||
from .db import session_fixture, engine_fixture
|
||||
from .client import client_fixture
|
||||
from .config import config_fixture
|
0
tests/test_expansions/__init__.py
Normal file
0
tests/test_expansions/__init__.py
Normal file
7
tests/test_expansions/test_timeline.py
Normal file
7
tests/test_expansions/test_timeline.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from ..fixtures.client import client_fixture
|
||||
|
||||
from diyalgo.expansions.timeline import fetch_timeline
|
||||
|
||||
def test_fetch_public_timeline(client_fixture):
|
||||
fetcher = fetch_timeline(client_fixture, 'public')
|
||||
tl = next(fetcher)
|
0
tests/test_workers/__init__.py
Normal file
0
tests/test_workers/__init__.py
Normal file
8
tests/test_workers/test_timeline.py
Normal file
8
tests/test_workers/test_timeline.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
import asyncio
|
||||
|
||||
from ..fixtures import session_fixture, engine_fixture, client_fixture, config_fixture
|
||||
|
||||
from diyalgo.workers.timeline import populate_timeline
|
||||
|
||||
def test_populate_timeline(session_fixture, client_fixture):
|
||||
populate_timeline(client_fixture, session_fixture)
|
Loading…
Reference in a new issue