working, but extremely slow and non-async feed grabber. why doesnt sqlmodel have freaking upsert lmao
This commit is contained in:
parent
d832e3a93e
commit
d274658bca
|
@ -0,0 +1,2 @@
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pydantic import BaseSettings, AnyHttpUrl, EmailStr
|
from pydantic import AnyHttpUrl, EmailStr, Field
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
class Config(BaseSettings):
|
class Config(BaseSettings):
|
||||||
MASTO_URL:AnyHttpUrl
|
MASTO_URL:str
|
||||||
MASTO_TOKEN: Optional[str] = None
|
MASTO_TOKEN: Optional[str] = None
|
||||||
LOGDIR:Path = Path().home() / '.diyalgo'
|
LOGDIR:Path = Path().home() / '.diyalgo'
|
||||||
DB:Optional[Path] = Path().home() / '.diyalgo' / 'diyalgo.db'
|
DB: Optional[Path] = Field(default=Path().home() / '.diyalgo' / 'diyalgo.db')
|
||||||
"""
|
"""
|
||||||
Optional, if set to ``None`` , use the in-memory sqlite DB
|
Optional, if set to ``None`` , use the in-memory sqlite DB
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from typing import List, Literal
|
from typing import List, Literal, Generator, Optional
|
||||||
import pdb
|
import pdb
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
|
|
||||||
|
@ -7,13 +8,39 @@ from diyalgo.models import Status
|
||||||
|
|
||||||
TIMELINES = Literal['home', 'local', 'public', 'tag', 'hashtag', 'list', 'id']
|
TIMELINES = Literal['home', 'local', 'public', 'tag', 'hashtag', 'list', 'id']
|
||||||
|
|
||||||
|
|
||||||
def fetch_timeline(
|
def fetch_timeline(
|
||||||
client:Mastodon,
|
client:Mastodon,
|
||||||
timeline:TIMELINES="public",
|
timeline:TIMELINES="public",
|
||||||
|
after: Optional[datetime] = datetime.now(timezone.utc) - timedelta(days=1),
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> List[Status]:
|
) -> Generator[List[Status], None, None]:
|
||||||
tl = client.timeline(timeline=timeline, **kwargs)
|
|
||||||
tl = client.fetch_remaining(tl)
|
next_tl = client.timeline(timeline=timeline, **kwargs)
|
||||||
pdb.set_trace()
|
yield pack_statuses(next_tl)
|
||||||
tl = [Status(**status) for status in tl]
|
last_tl = next_tl
|
||||||
return tl
|
|
||||||
|
while next_tl[-1]['created_at'] > after:
|
||||||
|
next_tl = client.fetch_next(last_tl)
|
||||||
|
if next_tl is None:
|
||||||
|
raise StopIteration()
|
||||||
|
|
||||||
|
yield pack_statuses(next_tl)
|
||||||
|
last_tl = next_tl
|
||||||
|
|
||||||
|
def pack_statuses(statuses:list[dict]) -> list[Status]:
|
||||||
|
out = []
|
||||||
|
for s in statuses:
|
||||||
|
if s['id'] not in [i.id for i in out]:
|
||||||
|
reblog = None
|
||||||
|
if s.get('reblog', None):
|
||||||
|
reblog = Status(**s.get('reblog'))
|
||||||
|
if reblog.id not in [i.id for i in out]:
|
||||||
|
out.append(reblog)
|
||||||
|
del s['reblog']
|
||||||
|
status = Status(**s)
|
||||||
|
status.reblog = reblog
|
||||||
|
out.append(status)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ class Account(SQLModel, table=True):
|
||||||
statuses: List['Status'] = Relationship(back_populates='account')
|
statuses: List['Status'] = Relationship(back_populates='account')
|
||||||
statuses_count: int
|
statuses_count: int
|
||||||
suspended: Optional[bool] = None
|
suspended: Optional[bool] = None
|
||||||
url: AnyHttpUrl
|
url: str
|
||||||
username: str
|
username: str
|
||||||
|
|
||||||
# class Config:
|
# class Config:
|
||||||
|
|
|
@ -6,11 +6,11 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
class MediaAttachment(SQLModel, table=True):
|
class MediaAttachment(SQLModel, table=True):
|
||||||
id: int = Field(primary_key=True)
|
id: int = Field(primary_key=True)
|
||||||
blurhash: str
|
blurhash: Optional[str] = None
|
||||||
description: str
|
description: Optional[str] = None
|
||||||
# meta: dict
|
# meta: dict
|
||||||
preview_url: str
|
preview_url: Optional[str] = None
|
||||||
remote_url: str
|
remote_url: Optional[str] = None
|
||||||
type: str #Literal['unknown', 'image', 'gifv', 'video', 'audio']
|
type: str #Literal['unknown', 'image', 'gifv', 'video', 'audio']
|
||||||
url: str
|
url: str
|
||||||
status_id: Optional[int] = Field(default=None, foreign_key='status.id')
|
status_id: Optional[int] = Field(default=None, foreign_key='status.id')
|
||||||
|
|
|
@ -13,7 +13,7 @@ class CustomEmoji(SQLModel, table=True):
|
||||||
url: str
|
url: str
|
||||||
static_url: str
|
static_url: str
|
||||||
visible_in_picker: bool
|
visible_in_picker: bool
|
||||||
category: str
|
category: Optional[str] = None
|
||||||
|
|
||||||
accounts: List['Account'] = Relationship(back_populates='emojis', link_model=EmojiAccountLink)
|
accounts: List['Account'] = Relationship(back_populates='emojis', link_model=EmojiAccountLink)
|
||||||
statuses: List['Status'] = Relationship(back_populates='emojis', link_model=EmojiStatusLink)
|
statuses: List['Status'] = Relationship(back_populates='emojis', link_model=EmojiStatusLink)
|
|
@ -18,8 +18,8 @@ class Poll(SQLModel, table=True):
|
||||||
expires_at: Optional[datetime] = None
|
expires_at: Optional[datetime] = None
|
||||||
expired: bool
|
expired: bool
|
||||||
multiple: bool
|
multiple: bool
|
||||||
options: List[PollOption] = Relationship(back_populates='poll')
|
options: list["PollOption"] = Relationship(back_populates='poll')
|
||||||
own_votes: List[int] = Field(default_factory=list)
|
#own_votes: list[int] = Field(default_factory=list)
|
||||||
voted: Optional[bool] = None
|
voted: Optional[bool] = None
|
||||||
votes_count: int
|
votes_count: int
|
||||||
voters_count: Optional[int] = None
|
voters_count: Optional[int] = None
|
||||||
|
|
|
@ -24,6 +24,7 @@ class Status(SQLModel, table=True):
|
||||||
|
|
||||||
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
|
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: int = Field(primary_key=True)
|
id: int = Field(primary_key=True)
|
||||||
# application: Optional[dict] = None
|
# application: Optional[dict] = None
|
||||||
account_id: Optional[int] = Field(default=None, foreign_key='account.id')
|
account_id: Optional[int] = Field(default=None, foreign_key='account.id')
|
||||||
|
@ -44,7 +45,18 @@ class Status(SQLModel, table=True):
|
||||||
muted: Optional[bool] = None
|
muted: Optional[bool] = None
|
||||||
pinned: Optional[bool] = None
|
pinned: Optional[bool] = None
|
||||||
# poll: Optional['Poll'] = Relationship(back_populates='status')
|
# poll: Optional['Poll'] = Relationship(back_populates='status')
|
||||||
reblog: Optional[bool] = None
|
reblog_id: Optional[int] = Field(
|
||||||
|
foreign_key='status.id',
|
||||||
|
default=None,
|
||||||
|
nullable=True
|
||||||
|
)
|
||||||
|
reblog: Optional['Status'] = Relationship(
|
||||||
|
back_populates='reblogged_by',
|
||||||
|
sa_relationship_kwargs = {
|
||||||
|
'remote_side': 'Status.id'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
reblogged_by: Optional[List['Status']] = Relationship(back_populates='reblog')
|
||||||
reblogged: Optional[bool] = None
|
reblogged: Optional[bool] = None
|
||||||
reblogs_count: int
|
reblogs_count: int
|
||||||
replies_count: int
|
replies_count: int
|
||||||
|
@ -53,7 +65,7 @@ class Status(SQLModel, table=True):
|
||||||
tags: List['Tag'] = Relationship(back_populates='statuses', link_model=TagStatusLink)
|
tags: List['Tag'] = Relationship(back_populates='statuses', link_model=TagStatusLink)
|
||||||
text: Optional[str] = None
|
text: Optional[str] = None
|
||||||
uri: str
|
uri: str
|
||||||
url: str
|
url: Optional[str] = None
|
||||||
visibility: str #Literal['public', 'unlisted', 'private', 'direct']
|
visibility: str #Literal['public', 'unlisted', 'private', 'direct']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
import pdb
|
||||||
|
|
||||||
|
from mastodon import Mastodon
|
||||||
|
from sqlmodel import Session, select
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
|
||||||
|
from diyalgo.expansions.timeline import fetch_timeline
|
||||||
|
from diyalgo.models.status import Status
|
||||||
|
|
||||||
|
def populate_timeline(
|
||||||
|
client: Mastodon,
|
||||||
|
session: Session,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
# try:
|
||||||
|
for posts in fetch_timeline(client, 'public', **kwargs):
|
||||||
|
for post in posts:
|
||||||
|
if post.reblog is not None:
|
||||||
|
statement = select(Status).where(Status.id == post.reblog.id)
|
||||||
|
existing_reblog = session.exec(statement).first()
|
||||||
|
if existing_reblog is not None:
|
||||||
|
post.reblog = existing_reblog
|
||||||
|
statement = select(Status).where(Status.id == post.id)
|
||||||
|
existing = session.exec(statement).first()
|
||||||
|
if existing is None:
|
||||||
|
session.add(post)
|
||||||
|
session.commit()
|
File diff suppressed because it is too large
Load Diff
|
@ -10,13 +10,15 @@ keywords = ["mastodon", "fediverse", "algorithm", "algorithms", "social media"]
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.11"
|
||||||
"Mastodon.py" = "^1.8.0"
|
"Mastodon.py" = "^1.8.0"
|
||||||
pydantic = "^1.10.4"
|
pydantic = ">=2.0.0"
|
||||||
sqlmodel = "^0.0.8"
|
sqlmodel = ">=0.0.14"
|
||||||
beautifulsoup4 = "^4.11.1"
|
beautifulsoup4 = "^4.11.1"
|
||||||
lxml = "^4.9.2"
|
lxml = "^4.9.2"
|
||||||
python-dotenv = "^0.21.0"
|
python-dotenv = ">=1.0.0"
|
||||||
|
pydantic-settings = "^2.1.0"
|
||||||
|
tqdm = "^4.66.1"
|
||||||
|
|
||||||
[tool.poetry.group.dev]
|
[tool.poetry.group.dev]
|
||||||
optional = true
|
optional = true
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
from .db import session_fixture, engine_fixture
|
||||||
|
from .client import client_fixture
|
||||||
|
from .config import config_fixture
|
|
@ -0,0 +1,7 @@
|
||||||
|
from ..fixtures.client import client_fixture
|
||||||
|
|
||||||
|
from diyalgo.expansions.timeline import fetch_timeline
|
||||||
|
|
||||||
|
def test_fetch_public_timeline(client_fixture):
|
||||||
|
fetcher = fetch_timeline(client_fixture, 'public')
|
||||||
|
tl = next(fetcher)
|
|
@ -0,0 +1,8 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from ..fixtures import session_fixture, engine_fixture, client_fixture, config_fixture
|
||||||
|
|
||||||
|
from diyalgo.workers.timeline import populate_timeline
|
||||||
|
|
||||||
|
def test_populate_timeline(session_fixture, client_fixture):
|
||||||
|
populate_timeline(client_fixture, session_fixture)
|
Loading…
Reference in New Issue