forked from forks/microblog.pub
OG metadata fixes/tweaks
This commit is contained in:
parent
b59223a2f7
commit
70cdde5d46
2 changed files with 38 additions and 20 deletions
28
app/boxes.py
28
app/boxes.py
|
@ -59,21 +59,21 @@ async def save_outbox_object(
|
||||||
source: str | None = None,
|
source: str | None = None,
|
||||||
is_transient: bool = False,
|
is_transient: bool = False,
|
||||||
) -> models.OutboxObject:
|
) -> models.OutboxObject:
|
||||||
ra = await RemoteObject.from_raw_object(raw_object)
|
ro = await RemoteObject.from_raw_object(raw_object)
|
||||||
|
|
||||||
outbox_object = models.OutboxObject(
|
outbox_object = models.OutboxObject(
|
||||||
public_id=public_id,
|
public_id=public_id,
|
||||||
ap_type=ra.ap_type,
|
ap_type=ro.ap_type,
|
||||||
ap_id=ra.ap_id,
|
ap_id=ro.ap_id,
|
||||||
ap_context=ra.ap_context,
|
ap_context=ro.ap_context,
|
||||||
ap_object=ra.ap_object,
|
ap_object=ro.ap_object,
|
||||||
visibility=ra.visibility,
|
visibility=ro.visibility,
|
||||||
og_meta=await opengraph.og_meta_from_note(ra.ap_object),
|
og_meta=await opengraph.og_meta_from_note(db_session, ro),
|
||||||
relates_to_inbox_object_id=relates_to_inbox_object_id,
|
relates_to_inbox_object_id=relates_to_inbox_object_id,
|
||||||
relates_to_outbox_object_id=relates_to_outbox_object_id,
|
relates_to_outbox_object_id=relates_to_outbox_object_id,
|
||||||
relates_to_actor_id=relates_to_actor_id,
|
relates_to_actor_id=relates_to_actor_id,
|
||||||
activity_object_ap_id=ra.activity_object_ap_id,
|
activity_object_ap_id=ro.activity_object_ap_id,
|
||||||
is_hidden_from_homepage=True if ra.in_reply_to else False,
|
is_hidden_from_homepage=True if ro.in_reply_to else False,
|
||||||
source=source,
|
source=source,
|
||||||
is_transient=is_transient,
|
is_transient=is_transient,
|
||||||
)
|
)
|
||||||
|
@ -429,7 +429,7 @@ async def send_create(
|
||||||
|
|
||||||
# If the note is public, check if we need to send any webmentions
|
# If the note is public, check if we need to send any webmentions
|
||||||
if visibility == ap.VisibilityEnum.PUBLIC:
|
if visibility == ap.VisibilityEnum.PUBLIC:
|
||||||
possible_targets = opengraph._urls_from_note(obj)
|
possible_targets = await opengraph.external_urls(db_session, outbox_object)
|
||||||
logger.info(f"webmentions possible targert {possible_targets}")
|
logger.info(f"webmentions possible targert {possible_targets}")
|
||||||
for target in possible_targets:
|
for target in possible_targets:
|
||||||
webmention_endpoint = await webmentions.discover_webmention_endpoint(target)
|
webmention_endpoint = await webmentions.discover_webmention_endpoint(target)
|
||||||
|
@ -552,7 +552,8 @@ async def send_update(
|
||||||
|
|
||||||
# If the note is public, check if we need to send any webmentions
|
# If the note is public, check if we need to send any webmentions
|
||||||
if outbox_object.visibility == ap.VisibilityEnum.PUBLIC:
|
if outbox_object.visibility == ap.VisibilityEnum.PUBLIC:
|
||||||
possible_targets = opengraph._urls_from_note(note)
|
|
||||||
|
possible_targets = await opengraph.external_urls(db_session, outbox_object)
|
||||||
logger.info(f"webmentions possible targert {possible_targets}")
|
logger.info(f"webmentions possible targert {possible_targets}")
|
||||||
for target in possible_targets:
|
for target in possible_targets:
|
||||||
webmention_endpoint = await webmentions.discover_webmention_endpoint(target)
|
webmention_endpoint = await webmentions.discover_webmention_endpoint(target)
|
||||||
|
@ -1209,7 +1210,7 @@ async def _process_note_object(
|
||||||
relates_to_inbox_object_id=parent_activity.id,
|
relates_to_inbox_object_id=parent_activity.id,
|
||||||
relates_to_outbox_object_id=None,
|
relates_to_outbox_object_id=None,
|
||||||
activity_object_ap_id=ro.activity_object_ap_id,
|
activity_object_ap_id=ro.activity_object_ap_id,
|
||||||
og_meta=await opengraph.og_meta_from_note(ro.ap_object),
|
og_meta=await opengraph.og_meta_from_note(db_session, ro),
|
||||||
# Hide replies from the stream
|
# Hide replies from the stream
|
||||||
is_hidden_from_stream=not (
|
is_hidden_from_stream=not (
|
||||||
(not is_reply and is_from_following) or is_mention or is_local_reply
|
(not is_reply and is_from_following) or is_mention or is_local_reply
|
||||||
|
@ -1614,6 +1615,9 @@ async def save_to_inbox(
|
||||||
ap_published_at=announced_object.ap_published_at,
|
ap_published_at=announced_object.ap_published_at,
|
||||||
ap_object=announced_object.ap_object,
|
ap_object=announced_object.ap_object,
|
||||||
visibility=announced_object.visibility,
|
visibility=announced_object.visibility,
|
||||||
|
og_meta=await opengraph.og_meta_from_note(
|
||||||
|
db_session, announced_object
|
||||||
|
),
|
||||||
is_hidden_from_stream=True,
|
is_hidden_from_stream=True,
|
||||||
)
|
)
|
||||||
db_session.add(announced_inbox_object)
|
db_session.add(announced_inbox_object)
|
||||||
|
|
|
@ -7,8 +7,13 @@ import httpx
|
||||||
from bs4 import BeautifulSoup # type: ignore
|
from bs4 import BeautifulSoup # type: ignore
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from app import activitypub as ap
|
from app import ap_object
|
||||||
from app import config
|
from app import config
|
||||||
|
from app.actor import LOCAL_ACTOR
|
||||||
|
from app.actor import fetch_actor
|
||||||
|
from app.database import AsyncSession
|
||||||
|
from app.models import InboxObject
|
||||||
|
from app.models import OutboxObject
|
||||||
from app.utils.url import is_url_valid
|
from app.utils.url import is_url_valid
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,17 +49,23 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
return OpenGraphMeta.parse_obj(raw)
|
return OpenGraphMeta.parse_obj(raw)
|
||||||
|
|
||||||
|
|
||||||
def _urls_from_note(note: ap.RawObject) -> set[str]:
|
async def external_urls(
|
||||||
note_host = urlparse(ap.get_id(note["id"]) or "").netloc
|
db_session: AsyncSession,
|
||||||
|
ro: ap_object.RemoteObject | OutboxObject | InboxObject,
|
||||||
|
) -> set[str]:
|
||||||
|
note_host = urlparse(ro.ap_id).netloc
|
||||||
|
|
||||||
tags_hrefs = set()
|
tags_hrefs = set()
|
||||||
for tag in note.get("tag", []):
|
for tag in ro.tags:
|
||||||
if tag_href := tag.get("href"):
|
if tag_href := tag.get("href"):
|
||||||
tags_hrefs.add(tag_href)
|
tags_hrefs.add(tag_href)
|
||||||
|
if tag.get("type") == "Mention" and tag["name"] != LOCAL_ACTOR.handle:
|
||||||
|
mentioned_actor = await fetch_actor(db_session, tag["href"])
|
||||||
|
tags_hrefs.add(mentioned_actor.url)
|
||||||
|
|
||||||
urls = set()
|
urls = set()
|
||||||
if "content" in note:
|
if ro.content:
|
||||||
soup = BeautifulSoup(note["content"], "html5lib")
|
soup = BeautifulSoup(ro.content, "html5lib")
|
||||||
for link in soup.find_all("a"):
|
for link in soup.find_all("a"):
|
||||||
h = link.get("href")
|
h = link.get("href")
|
||||||
ph = urlparse(h)
|
ph = urlparse(h)
|
||||||
|
@ -91,9 +102,12 @@ async def _og_meta_from_url(url: str) -> OpenGraphMeta | None:
|
||||||
return _scrap_og_meta(url, resp.text)
|
return _scrap_og_meta(url, resp.text)
|
||||||
|
|
||||||
|
|
||||||
async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]:
|
async def og_meta_from_note(
|
||||||
|
db_session: AsyncSession,
|
||||||
|
ro: ap_object.RemoteObject,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
og_meta = []
|
og_meta = []
|
||||||
urls = _urls_from_note(note)
|
urls = await external_urls(db_session, ro)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
maybe_og_meta = await _og_meta_from_url(url)
|
maybe_og_meta = await _og_meta_from_url(url)
|
||||||
|
|
Loading…
Reference in a new issue