mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2025-01-22 12:54:29 +00:00
Cleanup and add a unique request ID
This commit is contained in:
parent
16f4af0463
commit
5ea22edcb8
8 changed files with 86 additions and 54 deletions
77
app.py
77
app.py
|
@ -3,12 +3,13 @@ import logging
|
|||
import os
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from uuid import uuid4
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
from flask import Flask
|
||||
from flask import Response
|
||||
from flask import abort
|
||||
from flask import g
|
||||
from flask import jsonify as flask_jsonify
|
||||
from flask import redirect
|
||||
from flask import render_template
|
||||
|
@ -49,8 +50,10 @@ from core.meta import Box
|
|||
from core.meta import MetaKey
|
||||
from core.meta import _meta
|
||||
from core.meta import by_remote_id
|
||||
from core.meta import by_type
|
||||
from core.meta import in_outbox
|
||||
from core.meta import is_public
|
||||
from core.meta import not_undo
|
||||
from core.shared import _build_thread
|
||||
from core.shared import _get_ip
|
||||
from core.shared import csrf
|
||||
|
@ -59,6 +62,7 @@ from core.shared import jsonify
|
|||
from core.shared import login_required
|
||||
from core.shared import noindex
|
||||
from core.shared import paginated_query
|
||||
from utils.blacklist import is_blacklisted
|
||||
from utils.key import get_secret_key
|
||||
from utils.template_filters import filters
|
||||
|
||||
|
@ -87,14 +91,6 @@ else:
|
|||
root_logger.setLevel(gunicorn_logger.level)
|
||||
|
||||
|
||||
def is_blacklisted(url: str) -> bool:
|
||||
try:
|
||||
return urlparse(url).netloc in config.BLACKLIST
|
||||
except Exception:
|
||||
logger.exception(f"failed to blacklist for {url}")
|
||||
return False
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_config():
|
||||
q = {
|
||||
|
@ -153,18 +149,24 @@ def inject_config():
|
|||
)
|
||||
|
||||
|
||||
@app.before_request
|
||||
def generate_request_id():
|
||||
g.request_id = uuid4().hex
|
||||
|
||||
|
||||
@app.after_request
|
||||
def set_x_powered_by(response):
|
||||
response.headers["X-Powered-By"] = "microblog.pub"
|
||||
response.headers["X-Request-ID"] = g.request_id
|
||||
return response
|
||||
|
||||
|
||||
@app.errorhandler(ValueError)
|
||||
def handle_value_error(error):
|
||||
logger.error(
|
||||
f"caught value error: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
f"caught value error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
)
|
||||
response = flask_jsonify(message=error.args[0])
|
||||
response = flask_jsonify(message=error.args[0], request_id=g.request_id)
|
||||
response.status_code = 400
|
||||
return response
|
||||
|
||||
|
@ -172,9 +174,9 @@ def handle_value_error(error):
|
|||
@app.errorhandler(Error)
|
||||
def handle_activitypub_error(error):
|
||||
logger.error(
|
||||
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
)
|
||||
response = flask_jsonify(error.to_dict())
|
||||
response = flask_jsonify({**error.to_dict(), "request_id": g.request_id})
|
||||
response.status_code = error.status_code
|
||||
return response
|
||||
|
||||
|
@ -182,9 +184,9 @@ def handle_activitypub_error(error):
|
|||
@app.errorhandler(TaskError)
|
||||
def handle_task_error(error):
|
||||
logger.error(
|
||||
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||
)
|
||||
response = flask_jsonify({"traceback": error.message})
|
||||
response = flask_jsonify({"traceback": error.message, "request_id": g.request_id})
|
||||
response.status_code = 500
|
||||
return response
|
||||
|
||||
|
@ -274,7 +276,6 @@ def index():
|
|||
"meta.public": True,
|
||||
"$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}],
|
||||
}
|
||||
print(list(DB.activities.find(q)))
|
||||
|
||||
pinned = []
|
||||
# Only fetch the pinned notes if we're on the first page
|
||||
|
@ -417,7 +418,6 @@ def outbox():
|
|||
abort(401)
|
||||
|
||||
data = request.get_json(force=True)
|
||||
print(data)
|
||||
activity = ap.parse_activity(data)
|
||||
activity_id = post_to_outbox(activity)
|
||||
|
||||
|
@ -603,32 +603,21 @@ def inbox():
|
|||
return Response(
|
||||
status=422,
|
||||
headers={"Content-Type": "application/json"},
|
||||
response=json.dumps({"error": "failed to decode request as JSON"}),
|
||||
response=json.dumps(
|
||||
{
|
||||
"error": "failed to decode request body as JSON",
|
||||
"request_id": g.request_id,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
# Check the blacklist now to see if we can return super early
|
||||
if (
|
||||
"id" in data
|
||||
and is_blacklisted(data["id"])
|
||||
or (
|
||||
"object" in data
|
||||
and isinstance(data["object"], dict)
|
||||
and "id" in data["object"]
|
||||
and is_blacklisted(data["object"]["id"])
|
||||
)
|
||||
or (
|
||||
"object" in data
|
||||
and isinstance(data["object"], str)
|
||||
and is_blacklisted(data["object"])
|
||||
)
|
||||
):
|
||||
if is_blacklisted(data):
|
||||
logger.info(f"dropping activity from blacklisted host: {data['id']}")
|
||||
return Response(status=201)
|
||||
|
||||
print(f"req_headers={request.headers}")
|
||||
print(f"raw_data={data}")
|
||||
logger.debug(f"req_headers={request.headers}")
|
||||
logger.debug(f"raw_data={data}")
|
||||
logger.info(f"request_id={g.request_id} req_headers={request.headers!r}")
|
||||
logger.info(f"request_id={g.request_id} raw_data={data}")
|
||||
try:
|
||||
if not verify_request(
|
||||
request.method, request.path, request.headers, request.data
|
||||
|
@ -636,7 +625,7 @@ def inbox():
|
|||
raise Exception("failed to verify request")
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"failed to verify request, trying to verify the payload by fetching the remote"
|
||||
f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote"
|
||||
)
|
||||
try:
|
||||
remote_data = get_backend().fetch_iri(data["id"])
|
||||
|
@ -669,7 +658,8 @@ def inbox():
|
|||
headers={"Content-Type": "application/json"},
|
||||
response=json.dumps(
|
||||
{
|
||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
|
||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
|
||||
"request_id": g.request_id,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
@ -688,6 +678,7 @@ def inbox():
|
|||
"geoip": geoip,
|
||||
"tb": traceback.format_exc(),
|
||||
"headers": dict(request.headers),
|
||||
"request_id": g.request_id,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
@ -697,16 +688,16 @@ def inbox():
|
|||
headers={"Content-Type": "application/json"},
|
||||
response=json.dumps(
|
||||
{
|
||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
|
||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
|
||||
"request_id": g.request_id,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
# We fetched the remote data successfully
|
||||
data = remote_data
|
||||
print(data)
|
||||
activity = ap.parse_activity(data)
|
||||
logger.debug(f"inbox activity={activity}/{data}")
|
||||
logger.debug(f"inbox activity={g.request_id}/{activity}/{data}")
|
||||
post_to_inbox(activity)
|
||||
|
||||
return Response(status=201)
|
||||
|
@ -741,7 +732,7 @@ def followers():
|
|||
|
||||
@app.route("/following")
|
||||
def following():
|
||||
q = {"box": Box.OUTBOX.value, "type": ActivityType.FOLLOW.value, "meta.undo": False}
|
||||
q = {**in_outbox(), **by_type(ActivityType.FOLLOW), **not_undo()}
|
||||
|
||||
if is_api_request():
|
||||
return jsonify(
|
||||
|
|
|
@ -196,7 +196,7 @@ def task_finish_post_to_inbox() -> _Response:
|
|||
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
|
||||
app.logger.exception(f"no retry")
|
||||
except Exception as err:
|
||||
app.logger.exception(f"failed to cache attachments for {iri}")
|
||||
app.logger.exception(f"failed to cfinish post to inbox for {iri}")
|
||||
raise TaskError() from err
|
||||
|
||||
return ""
|
||||
|
|
|
@ -406,6 +406,8 @@ class MicroblogPubBackend(Backend):
|
|||
new_threads = []
|
||||
root_reply = in_reply_to
|
||||
reply = ap.fetch_remote_activity(root_reply)
|
||||
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
|
||||
# FIXME(tsileo): can be a 403 too, in this case what to do? not error at least
|
||||
|
||||
# Ensure the this is a local reply, of a question, with a direct "to" addressing
|
||||
if (
|
||||
|
@ -427,7 +429,6 @@ class MicroblogPubBackend(Backend):
|
|||
)
|
||||
return None
|
||||
|
||||
print(f"processing {create!r} and incrementing {in_reply_to}")
|
||||
creply = DB.activities.find_one_and_update(
|
||||
{"activity.object.id": in_reply_to},
|
||||
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
|
||||
|
@ -444,6 +445,7 @@ class MicroblogPubBackend(Backend):
|
|||
break
|
||||
root_reply = in_reply_to
|
||||
reply = ap.fetch_remote_activity(root_reply)
|
||||
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
|
||||
q = {"activity.object.id": root_reply}
|
||||
if not DB.activities.count(q):
|
||||
save(Box.REPLIES, reply)
|
||||
|
|
|
@ -38,8 +38,9 @@ def process_inbox(activity: ap.BaseActivity, new_meta: _NewMeta) -> None:
|
|||
def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None:
|
||||
_logger.info(f"process_inbox activity={delete!r}")
|
||||
obj_id = delete.get_object_id()
|
||||
_logger.debug("delete object={obj_id}")
|
||||
_logger.debug(f"delete object={obj_id}")
|
||||
try:
|
||||
# FIXME(tsileo): call the DB here instead? like for the oubox
|
||||
obj = ap.fetch_remote_activity(obj_id)
|
||||
_logger.info(f"inbox_delete handle_replies obj={obj!r}")
|
||||
in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None
|
||||
|
|
38
utils/blacklist.py
Normal file
38
utils/blacklist.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import logging
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_url_blacklisted(url: str) -> bool:
|
||||
try:
|
||||
return urlparse(url).netloc in config.BLACKLIST
|
||||
except Exception:
|
||||
logger.exception(f"failed to blacklist for {url}")
|
||||
return False
|
||||
|
||||
|
||||
def is_blacklisted(data: Dict[str, Any]) -> bool:
|
||||
"""Returns True if the activity is coming/or referencing a blacklisted host."""
|
||||
if (
|
||||
"id" in data
|
||||
and is_url_blacklisted(data["id"])
|
||||
or (
|
||||
"object" in data
|
||||
and isinstance(data["object"], dict)
|
||||
and "id" in data["object"]
|
||||
and is_url_blacklisted(data["object"]["id"])
|
||||
)
|
||||
or (
|
||||
"object" in data
|
||||
and isinstance(data["object"], str)
|
||||
and is_url_blacklisted(data["object"])
|
||||
)
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import mimetypes
|
||||
from enum import Enum
|
||||
from enum import unique
|
||||
from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
|
@ -13,7 +14,7 @@ from little_boxes import activitypub as ap
|
|||
from PIL import Image
|
||||
|
||||
|
||||
def load(url, user_agent):
|
||||
def load(url: str, user_agent: str) -> Image:
|
||||
"""Initializes a `PIL.Image` from the URL."""
|
||||
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
|
||||
resp.raise_for_status()
|
||||
|
@ -22,7 +23,7 @@ def load(url, user_agent):
|
|||
return Image.open(BytesIO(resp.raw.read()))
|
||||
|
||||
|
||||
def to_data_uri(img):
|
||||
def to_data_uri(img: Image) -> str:
|
||||
out = BytesIO()
|
||||
img.save(out, format=img.format)
|
||||
out.seek(0)
|
||||
|
@ -30,6 +31,7 @@ def to_data_uri(img):
|
|||
return f"data:{img.get_format_mimetype()};base64,{data}"
|
||||
|
||||
|
||||
@unique
|
||||
class Kind(Enum):
|
||||
ATTACHMENT = "attachment"
|
||||
ACTOR_ICON = "actor_icon"
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
import logging
|
||||
from typing import Any
|
||||
from typing import Dict
|
||||
from typing import Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import opengraph
|
||||
|
@ -14,7 +17,7 @@ from .lookup import lookup
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def links_from_note(note):
|
||||
def links_from_note(note: Dict[str, Any]) -> Set[str]:
|
||||
note_host = urlparse(ap._get_id(note["id"]) or "").netloc
|
||||
|
||||
links = set()
|
||||
|
|
|
@ -170,7 +170,6 @@ def url_or_id(d):
|
|||
|
||||
@filters.app_template_filter()
|
||||
def get_url(u):
|
||||
print(f"GET_URL({u!r})")
|
||||
if isinstance(u, list):
|
||||
for l in u:
|
||||
if l.get("mimeType") == "text/html":
|
||||
|
@ -191,7 +190,6 @@ def get_actor(url):
|
|||
url = url[0]
|
||||
if isinstance(url, dict):
|
||||
url = url.get("id")
|
||||
print(f"GET_ACTOR {url}")
|
||||
try:
|
||||
return ap.get_backend().fetch_iri(url)
|
||||
except (ActivityNotFoundError, ActivityGoneError):
|
||||
|
@ -203,8 +201,6 @@ def get_actor(url):
|
|||
@filters.app_template_filter()
|
||||
def get_answer_count(choice, obj, meta):
|
||||
count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0)
|
||||
print(count_from_meta)
|
||||
print(choice, obj, meta)
|
||||
if count_from_meta:
|
||||
return count_from_meta
|
||||
for option in obj.get("oneOf", obj.get("anyOf", [])):
|
||||
|
@ -219,7 +215,6 @@ def get_total_answers_count(obj, meta):
|
|||
return cached
|
||||
cnt = 0
|
||||
for choice in obj.get("anyOf", obj.get("oneOf", [])):
|
||||
print(choice)
|
||||
cnt += choice.get("replies", {}).get("totalItems", 0)
|
||||
return cnt
|
||||
|
||||
|
|
Loading…
Reference in a new issue