microblog.pub/app/activitypub.py

409 lines
12 KiB
Python
Raw Normal View History

2022-06-22 18:11:22 +00:00
import enum
import json
import mimetypes
from typing import TYPE_CHECKING
2022-06-22 18:11:22 +00:00
from typing import Any
import httpx
2022-08-20 07:11:48 +00:00
from loguru import logger
2022-06-22 18:11:22 +00:00
from app import config
from app.config import ALSO_KNOWN_AS
2022-06-22 20:17:07 +00:00
from app.config import AP_CONTENT_TYPE # noqa: F401
2022-09-08 18:00:02 +00:00
from app.config import MOVED_TO
2022-06-22 18:11:22 +00:00
from app.httpsig import auth
from app.key import get_pubkey_as_pem
2022-10-05 18:05:16 +00:00
from app.source import dedup_tags
2022-08-24 18:12:10 +00:00
from app.source import hashtagify
2022-07-15 18:50:27 +00:00
from app.utils.url import check_url
2022-06-22 18:11:22 +00:00
if TYPE_CHECKING:
from app.actor import Actor
2022-06-22 18:11:22 +00:00
RawObject = dict[str, Any]
AS_CTX = "https://www.w3.org/ns/activitystreams"
AS_PUBLIC = "https://www.w3.org/ns/activitystreams#Public"
ACTOR_TYPES = ["Application", "Group", "Organization", "Person", "Service"]
2022-06-28 07:58:33 +00:00
AS_EXTENDED_CTX = [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
{
# AS ext
"Hashtag": "as:Hashtag",
"sensitive": "as:sensitive",
"manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
"alsoKnownAs": {"@id": "as:alsoKnownAs", "@type": "@id"},
2022-09-07 19:29:09 +00:00
"movedTo": {"@id": "as:movedTo", "@type": "@id"},
2022-06-28 07:58:33 +00:00
# toot
"toot": "http://joinmastodon.org/ns#",
"featured": {"@id": "toot:featured", "@type": "@id"},
"Emoji": "toot:Emoji",
"blurhash": "toot:blurhash",
2022-07-23 17:02:06 +00:00
"votersCount": "toot:votersCount",
2022-06-28 07:58:33 +00:00
# schema
"schema": "http://schema.org#",
"PropertyValue": "schema:PropertyValue",
"value": "schema:value",
# ostatus
"ostatus": "http://ostatus.org#",
"conversation": "ostatus:conversation",
},
]
2022-06-22 18:11:22 +00:00
2022-09-26 19:41:34 +00:00
class FetchError(Exception):
def __init__(self, url: str, resp: httpx.Response | None = None) -> None:
resp_part = ""
if resp:
resp_part = f", got HTTP {resp.status_code}: {resp.text}"
message = f"Failed to fetch {url}{resp_part}"
super().__init__(message)
self.resp = resp
self.url = url
class ObjectIsGoneError(FetchError):
2022-06-24 20:41:43 +00:00
pass
2022-09-26 19:41:34 +00:00
class ObjectNotFoundError(FetchError):
2022-07-13 18:05:15 +00:00
pass
2022-09-26 19:41:34 +00:00
class ObjectUnavailableError(FetchError):
2022-09-12 06:04:16 +00:00
pass
2022-07-26 16:51:20 +00:00
class FetchErrorTypeEnum(str, enum.Enum):
TIMEOUT = "TIMEOUT"
NOT_FOUND = "NOT_FOUND"
2022-08-15 08:15:00 +00:00
UNAUHTORIZED = "UNAUTHORIZED"
2022-07-26 16:51:20 +00:00
INTERNAL_ERROR = "INTERNAL_ERROR"
2022-06-22 18:11:22 +00:00
class VisibilityEnum(str, enum.Enum):
PUBLIC = "public"
UNLISTED = "unlisted"
FOLLOWERS_ONLY = "followers-only"
2022-06-22 18:11:22 +00:00
DIRECT = "direct"
@staticmethod
def get_display_name(key: "VisibilityEnum") -> str:
return {
VisibilityEnum.PUBLIC: "Public - sent to followers and visible on the homepage", # noqa: E501
VisibilityEnum.UNLISTED: "Unlisted - like public, but hidden from the homepage", # noqa: E501,
VisibilityEnum.FOLLOWERS_ONLY: "Followers only",
VisibilityEnum.DIRECT: "Direct - only visible for mentioned actors",
}[key]
2022-06-22 18:11:22 +00:00
2022-08-24 18:12:10 +00:00
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
2022-10-05 18:05:16 +00:00
_LOCAL_ACTOR_METADATA = []
if config.CONFIG.metadata:
for kv in config.CONFIG.metadata:
kv_value, kv_tags = hashtagify(kv.value)
_LOCAL_ACTOR_METADATA.append(
{
"name": kv.key,
"type": "PropertyValue",
"value": kv_value,
}
)
_LOCAL_ACTOR_TAGS.extend(kv_tags)
2022-08-24 18:12:10 +00:00
2022-06-22 18:11:22 +00:00
ME = {
2022-06-28 07:58:33 +00:00
"@context": AS_EXTENDED_CTX,
2022-06-22 18:11:22 +00:00
"type": "Person",
"id": config.ID,
"following": config.BASE_URL + "/following",
"followers": config.BASE_URL + "/followers",
"featured": config.BASE_URL + "/featured",
2022-06-22 18:11:22 +00:00
"inbox": config.BASE_URL + "/inbox",
"outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME,
"name": config.CONFIG.name,
2022-10-05 18:05:16 +00:00
"summary": _LOCAL_ACTOR_SUMMARY,
2022-07-20 17:57:03 +00:00
"endpoints": {
# For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL
+ "/inbox",
},
2022-08-09 20:23:14 +00:00
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
2022-10-05 18:05:16 +00:00
"attachment": _LOCAL_ACTOR_METADATA,
2022-06-22 18:11:22 +00:00
"publicKey": {
"id": f"{config.ID}#main-key",
"owner": config.ID,
2022-07-04 18:25:27 +00:00
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
2022-06-22 18:11:22 +00:00
},
2022-10-05 18:05:16 +00:00
"tag": dedup_tags(_LOCAL_ACTOR_TAGS),
2022-06-22 18:11:22 +00:00
}
if config.CONFIG.icon_url:
ME["icon"] = {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image",
"url": config.CONFIG.icon_url,
}
if ALSO_KNOWN_AS:
ME["alsoKnownAs"] = [ALSO_KNOWN_AS]
2022-09-08 18:00:02 +00:00
if MOVED_TO:
ME["movedTo"] = MOVED_TO
if config.CONFIG.image_url:
ME["image"] = {
"mediaType": mimetypes.guess_type(config.CONFIG.image_url)[0],
"type": "Image",
"url": config.CONFIG.image_url,
}
2022-06-22 18:11:22 +00:00
2022-06-22 18:11:22 +00:00
class NotAnObjectError(Exception):
def __init__(self, url: str, resp: httpx.Response | None = None) -> None:
message = f"{url} is not an AP activity"
super().__init__(message)
self.url = url
self.resp = resp
2022-07-14 17:43:02 +00:00
async def fetch(
url: str,
params: dict[str, Any] | None = None,
disable_httpsig: bool = False,
) -> RawObject:
2022-08-20 07:11:48 +00:00
logger.info(f"Fetching {url} ({params=})")
2022-07-15 18:50:27 +00:00
check_url(url)
2022-07-14 17:05:45 +00:00
async with httpx.AsyncClient() as client:
2022-06-29 22:28:07 +00:00
resp = await client.get(
url,
headers={
"User-Agent": config.USER_AGENT,
"Accept": config.AP_CONTENT_TYPE,
},
params=params,
follow_redirects=True,
2022-07-14 17:43:02 +00:00
auth=None if disable_httpsig else auth,
2022-06-29 22:28:07 +00:00
)
2022-06-24 20:41:43 +00:00
# Special handling for deleted object
if resp.status_code == 410:
2022-09-26 19:41:34 +00:00
raise ObjectIsGoneError(url, resp)
2022-09-12 06:04:16 +00:00
elif resp.status_code in [401, 403]:
2022-09-26 19:41:34 +00:00
raise ObjectUnavailableError(url, resp)
2022-07-13 18:05:15 +00:00
elif resp.status_code == 404:
2022-09-26 19:41:34 +00:00
raise ObjectNotFoundError(url, resp)
try:
resp.raise_for_status()
except httpx.HTTPError as http_error:
raise FetchError(url, resp) from http_error
2022-06-24 20:41:43 +00:00
2022-06-22 18:11:22 +00:00
try:
return resp.json()
except json.JSONDecodeError:
raise NotAnObjectError(url, resp)
2022-06-29 22:28:07 +00:00
async def parse_collection( # noqa: C901
2022-06-22 18:11:22 +00:00
url: str | None = None,
payload: RawObject | None = None,
level: int = 0,
limit: int = 0,
2022-06-22 18:11:22 +00:00
) -> list[RawObject]:
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
if level > 3:
raise ValueError("recursion limit exceeded")
# Go through all the pages
out: list[RawObject] = []
if url:
2022-06-29 22:28:07 +00:00
payload = await fetch(url)
2022-06-22 18:11:22 +00:00
if not payload:
raise ValueError("must at least prove a payload or an URL")
ap_type = payload.get("type")
if not ap_type:
raise ValueError(f"Missing type: {payload=}")
if level == 0 and ap_type not in ["Collection", "OrderedCollection"]:
raise ValueError(f"Unexpected type {ap_type}")
if payload["type"] in ["Collection", "OrderedCollection"]:
if "orderedItems" in payload:
return payload["orderedItems"]
if "items" in payload:
return payload["items"]
if "first" in payload:
if isinstance(payload["first"], str):
2022-06-29 22:28:07 +00:00
out.extend(
await parse_collection(
url=payload["first"], level=level + 1, limit=limit
)
2022-06-29 22:28:07 +00:00
)
2022-06-22 18:11:22 +00:00
else:
if "orderedItems" in payload["first"]:
out.extend(payload["first"]["orderedItems"])
if "items" in payload["first"]:
out.extend(payload["first"]["items"])
n = payload["first"].get("next")
if n:
out.extend(
await parse_collection(url=n, level=level + 1, limit=limit)
)
2022-06-22 18:11:22 +00:00
return out
while payload:
if ap_type in ["CollectionPage", "OrderedCollectionPage"]:
if "orderedItems" in payload:
out.extend(payload["orderedItems"])
if "items" in payload:
out.extend(payload["items"])
n = payload.get("next")
if n is None or (limit > 0 and len(out) >= limit):
2022-06-22 18:11:22 +00:00
break
2022-06-29 22:28:07 +00:00
payload = await fetch(n)
2022-06-22 18:11:22 +00:00
else:
raise ValueError("unexpected activity type {}".format(payload["type"]))
return out
def as_list(val: Any | list[Any]) -> list[Any]:
if isinstance(val, list):
return val
return [val]
def get_id(val: str | dict[str, Any]) -> str:
if isinstance(val, dict):
val = val["id"]
if not isinstance(val, str):
raise ValueError(f"Invalid ID type: {val}")
return val
def object_visibility(ap_activity: RawObject, actor: "Actor") -> VisibilityEnum:
2022-06-22 18:11:22 +00:00
to = as_list(ap_activity.get("to", []))
cc = as_list(ap_activity.get("cc", []))
if AS_PUBLIC in to:
return VisibilityEnum.PUBLIC
elif AS_PUBLIC in cc:
return VisibilityEnum.UNLISTED
elif actor.followers_collection_id and actor.followers_collection_id in to + cc:
return VisibilityEnum.FOLLOWERS_ONLY
2022-06-22 18:11:22 +00:00
else:
return VisibilityEnum.DIRECT
def get_actor_id(activity: RawObject) -> str:
if "attributedTo" in activity:
2022-06-22 18:11:22 +00:00
attributed_to = as_list(activity["attributedTo"])
return get_id(attributed_to[0])
else:
return get_id(activity["actor"])
2022-07-05 19:09:49 +00:00
async def get_object(activity: RawObject) -> RawObject:
if "object" not in activity:
raise ValueError(f"No object in {activity}")
raw_activity_object = activity["object"]
if isinstance(raw_activity_object, dict):
return raw_activity_object
elif isinstance(raw_activity_object, str):
return await fetch(raw_activity_object)
else:
raise ValueError(f"Unexpected object {raw_activity_object}")
2022-08-16 20:15:05 +00:00
def get_object_id(activity: RawObject) -> str:
if "object" not in activity:
raise ValueError(f"No object in {activity}")
return get_id(activity["object"])
2022-06-22 18:11:22 +00:00
def wrap_object(activity: RawObject) -> RawObject:
2022-07-26 19:10:59 +00:00
# TODO(tsileo): improve Create VS Update with a `update=True` flag
if "updated" in activity:
return {
"@context": AS_EXTENDED_CTX,
"actor": config.ID,
"to": activity.get("to", []),
"cc": activity.get("cc", []),
"id": activity["id"] + "/update_activity/" + activity["updated"],
"object": remove_context(activity),
"published": activity["published"],
"updated": activity["updated"],
"type": "Update",
}
else:
return {
"@context": AS_EXTENDED_CTX,
"actor": config.ID,
"to": activity.get("to", []),
"cc": activity.get("cc", []),
"id": activity["id"] + "/activity",
"object": remove_context(activity),
"published": activity["published"],
"type": "Create",
}
2022-06-22 18:11:22 +00:00
def wrap_object_if_needed(raw_object: RawObject) -> RawObject:
2022-07-23 17:02:06 +00:00
if raw_object["type"] in ["Note", "Article", "Question"]:
2022-06-22 18:11:22 +00:00
return wrap_object(raw_object)
return raw_object
def unwrap_activity(activity: RawObject) -> RawObject:
2022-07-05 19:09:49 +00:00
# FIXME(ts): deprecate this
if activity["type"] in ["Create", "Update"]:
2022-06-22 18:11:22 +00:00
unwrapped_object = activity["object"]
# Sanity check, ensure the wrapped object actor matches the activity
if get_actor_id(unwrapped_object) != get_actor_id(activity):
raise ValueError(
f"Unwrapped object actor does not match activity: {activity}"
)
return unwrapped_object
return activity
def remove_context(raw_object: RawObject) -> RawObject:
if "@context" not in raw_object:
return raw_object
a = dict(raw_object)
del a["@context"]
return a
async def post(url: str, payload: dict[str, Any]) -> httpx.Response:
2022-08-20 07:11:48 +00:00
logger.info(f"Posting {url} ({payload=})")
2022-07-15 18:50:27 +00:00
check_url(url)
async with httpx.AsyncClient() as client:
resp = await client.post(
url,
headers={
"User-Agent": config.USER_AGENT,
"Content-Type": config.AP_CONTENT_TYPE,
},
json=payload,
auth=auth,
)
2022-06-22 18:11:22 +00:00
resp.raise_for_status()
return resp