From 5ea22edcb81a68c86bd5d67b860d4eb0ad4bf522 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Mon, 5 Aug 2019 22:40:24 +0200 Subject: [PATCH] Cleanup and add a unique request ID --- app.py | 77 +++++++++++++++++---------------------- blueprints/tasks.py | 2 +- core/activitypub.py | 4 +- core/inbox.py | 3 +- utils/blacklist.py | 38 +++++++++++++++++++ utils/media.py | 6 ++- utils/opengraph.py | 5 ++- utils/template_filters.py | 5 --- 8 files changed, 86 insertions(+), 54 deletions(-) create mode 100644 utils/blacklist.py diff --git a/app.py b/app.py index 14604b1..fe705f6 100644 --- a/app.py +++ b/app.py @@ -3,12 +3,13 @@ import logging import os import traceback from datetime import datetime -from urllib.parse import urlparse +from uuid import uuid4 from bson.objectid import ObjectId from flask import Flask from flask import Response from flask import abort +from flask import g from flask import jsonify as flask_jsonify from flask import redirect from flask import render_template @@ -49,8 +50,10 @@ from core.meta import Box from core.meta import MetaKey from core.meta import _meta from core.meta import by_remote_id +from core.meta import by_type from core.meta import in_outbox from core.meta import is_public +from core.meta import not_undo from core.shared import _build_thread from core.shared import _get_ip from core.shared import csrf @@ -59,6 +62,7 @@ from core.shared import jsonify from core.shared import login_required from core.shared import noindex from core.shared import paginated_query +from utils.blacklist import is_blacklisted from utils.key import get_secret_key from utils.template_filters import filters @@ -87,14 +91,6 @@ else: root_logger.setLevel(gunicorn_logger.level) -def is_blacklisted(url: str) -> bool: - try: - return urlparse(url).netloc in config.BLACKLIST - except Exception: - logger.exception(f"failed to blacklist for {url}") - return False - - @app.context_processor def inject_config(): q = { @@ -153,18 +149,24 @@ def inject_config(): ) +@app.before_request +def generate_request_id(): + g.request_id = uuid4().hex + + @app.after_request def set_x_powered_by(response): response.headers["X-Powered-By"] = "microblog.pub" + response.headers["X-Request-ID"] = g.request_id return response @app.errorhandler(ValueError) def handle_value_error(error): logger.error( - f"caught value error: {error!r}, {traceback.format_tb(error.__traceback__)}" + f"caught value error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}" ) - response = flask_jsonify(message=error.args[0]) + response = flask_jsonify(message=error.args[0], request_id=g.request_id) response.status_code = 400 return response @@ -172,9 +174,9 @@ def handle_value_error(error): @app.errorhandler(Error) def handle_activitypub_error(error): logger.error( - f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}" + f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}" ) - response = flask_jsonify(error.to_dict()) + response = flask_jsonify({**error.to_dict(), "request_id": g.request_id}) response.status_code = error.status_code return response @@ -182,9 +184,9 @@ def handle_activitypub_error(error): @app.errorhandler(TaskError) def handle_task_error(error): logger.error( - f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}" + f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}" ) - response = flask_jsonify({"traceback": error.message}) + response = flask_jsonify({"traceback": error.message, "request_id": g.request_id}) response.status_code = 500 return response @@ -274,7 +276,6 @@ def index(): "meta.public": True, "$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}], } - print(list(DB.activities.find(q))) pinned = [] # Only fetch the pinned notes if we're on the first page @@ -417,7 +418,6 @@ def outbox(): abort(401) data = request.get_json(force=True) - print(data) activity = ap.parse_activity(data) activity_id = post_to_outbox(activity) @@ -603,32 +603,21 @@ def inbox(): return Response( status=422, headers={"Content-Type": "application/json"}, - response=json.dumps({"error": "failed to decode request as JSON"}), + response=json.dumps( + { + "error": "failed to decode request body as JSON", + "request_id": g.request_id, + } + ), ) # Check the blacklist now to see if we can return super early - if ( - "id" in data - and is_blacklisted(data["id"]) - or ( - "object" in data - and isinstance(data["object"], dict) - and "id" in data["object"] - and is_blacklisted(data["object"]["id"]) - ) - or ( - "object" in data - and isinstance(data["object"], str) - and is_blacklisted(data["object"]) - ) - ): + if is_blacklisted(data): logger.info(f"dropping activity from blacklisted host: {data['id']}") return Response(status=201) - print(f"req_headers={request.headers}") - print(f"raw_data={data}") - logger.debug(f"req_headers={request.headers}") - logger.debug(f"raw_data={data}") + logger.info(f"request_id={g.request_id} req_headers={request.headers!r}") + logger.info(f"request_id={g.request_id} raw_data={data}") try: if not verify_request( request.method, request.path, request.headers, request.data @@ -636,7 +625,7 @@ def inbox(): raise Exception("failed to verify request") except Exception: logger.exception( - "failed to verify request, trying to verify the payload by fetching the remote" + f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote" ) try: remote_data = get_backend().fetch_iri(data["id"]) @@ -669,7 +658,8 @@ def inbox(): headers={"Content-Type": "application/json"}, response=json.dumps( { - "error": "failed to verify request (using HTTP signatures or fetching the IRI)" + "error": "failed to verify request (using HTTP signatures or fetching the IRI)", + "request_id": g.request_id, } ), ) @@ -688,6 +678,7 @@ def inbox(): "geoip": geoip, "tb": traceback.format_exc(), "headers": dict(request.headers), + "request_id": g.request_id, }, } ) @@ -697,16 +688,16 @@ def inbox(): headers={"Content-Type": "application/json"}, response=json.dumps( { - "error": "failed to verify request (using HTTP signatures or fetching the IRI)" + "error": "failed to verify request (using HTTP signatures or fetching the IRI)", + "request_id": g.request_id, } ), ) # We fetched the remote data successfully data = remote_data - print(data) activity = ap.parse_activity(data) - logger.debug(f"inbox activity={activity}/{data}") + logger.debug(f"inbox activity={g.request_id}/{activity}/{data}") post_to_inbox(activity) return Response(status=201) @@ -741,7 +732,7 @@ def followers(): @app.route("/following") def following(): - q = {"box": Box.OUTBOX.value, "type": ActivityType.FOLLOW.value, "meta.undo": False} + q = {**in_outbox(), **by_type(ActivityType.FOLLOW), **not_undo()} if is_api_request(): return jsonify( diff --git a/blueprints/tasks.py b/blueprints/tasks.py index 1939d3d..f98ea6e 100644 --- a/blueprints/tasks.py +++ b/blueprints/tasks.py @@ -196,7 +196,7 @@ def task_finish_post_to_inbox() -> _Response: except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError): app.logger.exception(f"no retry") except Exception as err: - app.logger.exception(f"failed to cache attachments for {iri}") + app.logger.exception(f"failed to cfinish post to inbox for {iri}") raise TaskError() from err return "" diff --git a/core/activitypub.py b/core/activitypub.py index 53dc539..7ec62a8 100644 --- a/core/activitypub.py +++ b/core/activitypub.py @@ -406,6 +406,8 @@ class MicroblogPubBackend(Backend): new_threads = [] root_reply = in_reply_to reply = ap.fetch_remote_activity(root_reply) + # FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that) + # FIXME(tsileo): can be a 403 too, in this case what to do? not error at least # Ensure the this is a local reply, of a question, with a direct "to" addressing if ( @@ -427,7 +429,6 @@ class MicroblogPubBackend(Backend): ) return None - print(f"processing {create!r} and incrementing {in_reply_to}") creply = DB.activities.find_one_and_update( {"activity.object.id": in_reply_to}, {"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}}, @@ -444,6 +445,7 @@ class MicroblogPubBackend(Backend): break root_reply = in_reply_to reply = ap.fetch_remote_activity(root_reply) + # FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that) q = {"activity.object.id": root_reply} if not DB.activities.count(q): save(Box.REPLIES, reply) diff --git a/core/inbox.py b/core/inbox.py index 2b17a2a..d7e9cba 100644 --- a/core/inbox.py +++ b/core/inbox.py @@ -38,8 +38,9 @@ def process_inbox(activity: ap.BaseActivity, new_meta: _NewMeta) -> None: def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None: _logger.info(f"process_inbox activity={delete!r}") obj_id = delete.get_object_id() - _logger.debug("delete object={obj_id}") + _logger.debug(f"delete object={obj_id}") try: + # FIXME(tsileo): call the DB here instead? like for the oubox obj = ap.fetch_remote_activity(obj_id) _logger.info(f"inbox_delete handle_replies obj={obj!r}") in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None diff --git a/utils/blacklist.py b/utils/blacklist.py new file mode 100644 index 0000000..abd3d7a --- /dev/null +++ b/utils/blacklist.py @@ -0,0 +1,38 @@ +import logging +from typing import Any +from typing import Dict +from urllib.parse import urlparse + +import config + +logger = logging.getLogger(__name__) + + +def is_url_blacklisted(url: str) -> bool: + try: + return urlparse(url).netloc in config.BLACKLIST + except Exception: + logger.exception(f"failed to blacklist for {url}") + return False + + +def is_blacklisted(data: Dict[str, Any]) -> bool: + """Returns True if the activity is coming/or referencing a blacklisted host.""" + if ( + "id" in data + and is_url_blacklisted(data["id"]) + or ( + "object" in data + and isinstance(data["object"], dict) + and "id" in data["object"] + and is_url_blacklisted(data["object"]["id"]) + ) + or ( + "object" in data + and isinstance(data["object"], str) + and is_url_blacklisted(data["object"]) + ) + ): + return True + + return False diff --git a/utils/media.py b/utils/media.py index 3f6c109..d26c822 100644 --- a/utils/media.py +++ b/utils/media.py @@ -1,6 +1,7 @@ import base64 import mimetypes from enum import Enum +from enum import unique from gzip import GzipFile from io import BytesIO from typing import Any @@ -13,7 +14,7 @@ from little_boxes import activitypub as ap from PIL import Image -def load(url, user_agent): +def load(url: str, user_agent: str) -> Image: """Initializes a `PIL.Image` from the URL.""" with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: resp.raise_for_status() @@ -22,7 +23,7 @@ def load(url, user_agent): return Image.open(BytesIO(resp.raw.read())) -def to_data_uri(img): +def to_data_uri(img: Image) -> str: out = BytesIO() img.save(out, format=img.format) out.seek(0) @@ -30,6 +31,7 @@ def to_data_uri(img): return f"data:{img.get_format_mimetype()};base64,{data}" +@unique class Kind(Enum): ATTACHMENT = "attachment" ACTOR_ICON = "actor_icon" diff --git a/utils/opengraph.py b/utils/opengraph.py index 5379119..8052e39 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -1,4 +1,7 @@ import logging +from typing import Any +from typing import Dict +from typing import Set from urllib.parse import urlparse import opengraph @@ -14,7 +17,7 @@ from .lookup import lookup logger = logging.getLogger(__name__) -def links_from_note(note): +def links_from_note(note: Dict[str, Any]) -> Set[str]: note_host = urlparse(ap._get_id(note["id"]) or "").netloc links = set() diff --git a/utils/template_filters.py b/utils/template_filters.py index 1f23b8f..8978862 100644 --- a/utils/template_filters.py +++ b/utils/template_filters.py @@ -170,7 +170,6 @@ def url_or_id(d): @filters.app_template_filter() def get_url(u): - print(f"GET_URL({u!r})") if isinstance(u, list): for l in u: if l.get("mimeType") == "text/html": @@ -191,7 +190,6 @@ def get_actor(url): url = url[0] if isinstance(url, dict): url = url.get("id") - print(f"GET_ACTOR {url}") try: return ap.get_backend().fetch_iri(url) except (ActivityNotFoundError, ActivityGoneError): @@ -203,8 +201,6 @@ def get_actor(url): @filters.app_template_filter() def get_answer_count(choice, obj, meta): count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0) - print(count_from_meta) - print(choice, obj, meta) if count_from_meta: return count_from_meta for option in obj.get("oneOf", obj.get("anyOf", [])): @@ -219,7 +215,6 @@ def get_total_answers_count(obj, meta): return cached cnt = 0 for choice in obj.get("anyOf", obj.get("oneOf", [])): - print(choice) cnt += choice.get("replies", {}).get("totalItems", 0) return cnt