From 25a75a9cef2bed500437b610d812f7148813bafa Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Sun, 27 May 2018 11:01:34 +0200 Subject: [PATCH] Cleanup, improve the collection resolver --- README.md | 8 +++++ activitypub.py | 43 ++----------------------- config.py | 4 +++ tests/federation_test.py | 5 +++ utils/activitypub_utils.py | 65 ++++++++++++++++++++++++++++++++++++++ utils/errors.py | 15 +++++++++ 6 files changed, 99 insertions(+), 41 deletions(-) create mode 100644 utils/activitypub_utils.py create mode 100644 utils/errors.py diff --git a/README.md b/README.md index 445cf6a..5784e93 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,14 @@ - Manually tested against [Mastodon](https://github.com/tootsuite/mastodon) - Project is running an up-to-date instance +## ActivityPub + +microblog.pub implements an [ActivityPub](http://activitypub.rocks/) server, it implements both the client to server API and the federated server to server API. + +Compatible with [Mastodon](https://github.com/tootsuite/mastodon) (which is not following the spec closely), but will drop OStatus messages. + +Activities are verified using HTTP Signatures or by fetching the content on the remote server directly. + ## Running your instance ### Installation diff --git a/activitypub.py b/activitypub.py index 5cf1bee..f1de0a8 100644 --- a/activitypub.py +++ b/activitypub.py @@ -4,13 +4,13 @@ import os from datetime import datetime from enum import Enum -import requests from bson.objectid import ObjectId from html2text import html2text from feedgen.feed import FeedGenerator from utils.linked_data_sig import generate_signature from utils.actor_service import NotAnActorError +from utils import activitypub_utils from config import USERNAME, BASE_URL, ID from config import CTX_AS, CTX_SECURITY, AS_PUBLIC from config import KEY, DB, ME, ACTOR_SERVICE @@ -936,46 +936,7 @@ def parse_collection(payload: Optional[Dict[str, Any]] = None, url: Optional[str return [doc['remote_actor'] for doc in DB.following.find()] # Go through all the pages - out: List[str] = [] - if url: - resp = requests.get(url, headers={'Accept': 'application/activity+json'}) - resp.raise_for_status() - payload = resp.json() - - if not payload: - raise ValueError('must at least prove a payload or an URL') - - if payload['type'] in ['Collection', 'OrderedCollection']: - if 'orderedItems' in payload: - return payload['orderedItems'] - if 'items' in payload: - return payload['items'] - if 'first' in payload: - if 'orderedItems' in payload['first']: - out.extend(payload['first']['orderedItems']) - if 'items' in payload['first']: - out.extend(payload['first']['items']) - n = payload['first'].get('next') - if n: - out.extend(parse_collection(url=n)) - return out - - while payload: - if payload['type'] in ['CollectionPage', 'OrderedCollectionPage']: - if 'orderedItems' in payload: - out.extend(payload['orderedItems']) - if 'items' in payload: - out.extend(payload['items']) - n = payload.get('next') - if n is None: - break - resp = requests.get(n, headers={'Accept': 'application/activity+json'}) - resp.raise_for_status() - payload = resp.json() - else: - raise Exception('unexpected activity type {}'.format(payload['type'])) - - return out + return activitypub_utils.parse_collection(payload, url) def build_ordered_collection(col, q=None, cursor=None, map_func=None, limit=50, col_name=None): diff --git a/config.py b/config.py index 6cb17e6..5bbebe5 100644 --- a/config.py +++ b/config.py @@ -4,6 +4,7 @@ import yaml from pymongo import MongoClient import requests +from utils import strtobool from utils.key import Key from utils.actor_service import ActorService from utils.object_service import ObjectService @@ -20,6 +21,9 @@ except ModuleNotFoundError: VERSION = subprocess.check_output(['git', 'describe', '--always']).split()[0].decode('utf-8') +DEBUG_MODE = strtobool(os.getenv('MICROBLOGPUB_DEBUG', 'false')) + + CTX_AS = 'https://www.w3.org/ns/activitystreams' CTX_SECURITY = 'https://w3id.org/security/v1' AS_PUBLIC = 'https://www.w3.org/ns/activitystreams#Public' diff --git a/tests/federation_test.py b/tests/federation_test.py index 20203de..f291638 100644 --- a/tests/federation_test.py +++ b/tests/federation_test.py @@ -53,6 +53,11 @@ class Instance(object): return resp.json()['first']['orderedItems'] + def outbox(self): + resp = self.session.get(f'{self.host_url}/following', headers={'Accept': 'application/activity+json'}) + resp.raise_for_status() + return resp.json() + def test_federation(): """Ensure the homepage is accessible.""" diff --git a/utils/activitypub_utils.py b/utils/activitypub_utils.py new file mode 100644 index 0000000..0275f54 --- /dev/null +++ b/utils/activitypub_utils.py @@ -0,0 +1,65 @@ +from typing import Optional, Dict, List, Any + +import requests + +from .errors import RecursionLimitExceededError +from .errors import UnexpectedActivityTypeError + + +def _do_req(url: str, headers: Dict[str, str]) -> Dict[str, Any]: + resp = requests.get(url, headers=headers) + resp.raise_for_status() + return resp.json() + + +def parse_collection( + payload: Optional[Dict[str, Any]] = None, + url: Optional[str] = None, + user_agent: Optional[str] = None, + level: int = 0, + do_req: Any = _do_req, +) -> List[str]: + """Resolve/fetch a `Collection`/`OrderedCollection`.""" + if level > 3: + raise RecursionLimitExceededError('recursion limit exceeded') + + # Go through all the pages + headers = {'Accept': 'application/activity+json'} + if user_agent: + headers['User-Agent'] = user_agent + + out: List[str] = [] + if url: + payload = do_req(url, headers) + if not payload: + raise ValueError('must at least prove a payload or an URL') + + if payload['type'] in ['Collection', 'OrderedCollection']: + if 'orderedItems' in payload: + return payload['orderedItems'] + if 'items' in payload: + return payload['items'] + if 'first' in payload: + if 'orderedItems' in payload['first']: + out.extend(payload['first']['orderedItems']) + if 'items' in payload['first']: + out.extend(payload['first']['items']) + n = payload['first'].get('next') + if n: + out.extend(parse_collection(url=n, user_agent=user_agent, level=level+1, do_req=do_req)) + return out + + while payload: + if payload['type'] in ['CollectionPage', 'OrderedCollectionPage']: + if 'orderedItems' in payload: + out.extend(payload['orderedItems']) + if 'items' in payload: + out.extend(payload['items']) + n = payload.get('next') + if n is None: + break + payload = do_req(n, headers) + else: + raise UnexpectedActivityTypeError('unexpected activity type {}'.format(payload['type'])) + + return out diff --git a/utils/errors.py b/utils/errors.py new file mode 100644 index 0000000..31e678e --- /dev/null +++ b/utils/errors.py @@ -0,0 +1,15 @@ + +class Error(Exception): + pass + + +class BadActivityError(Error): + pass + + +class RecursionLimitExceededError(BadActivityError): + pass + + +class UnexpectedActivityTypeError(BadActivityError): + pass