forked from forks/microblog.pub
Cleanup and add a unique request ID
This commit is contained in:
parent
16f4af0463
commit
5ea22edcb8
8 changed files with 86 additions and 54 deletions
77
app.py
77
app.py
|
@ -3,12 +3,13 @@ import logging
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import urlparse
|
from uuid import uuid4
|
||||||
|
|
||||||
from bson.objectid import ObjectId
|
from bson.objectid import ObjectId
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
from flask import Response
|
from flask import Response
|
||||||
from flask import abort
|
from flask import abort
|
||||||
|
from flask import g
|
||||||
from flask import jsonify as flask_jsonify
|
from flask import jsonify as flask_jsonify
|
||||||
from flask import redirect
|
from flask import redirect
|
||||||
from flask import render_template
|
from flask import render_template
|
||||||
|
@ -49,8 +50,10 @@ from core.meta import Box
|
||||||
from core.meta import MetaKey
|
from core.meta import MetaKey
|
||||||
from core.meta import _meta
|
from core.meta import _meta
|
||||||
from core.meta import by_remote_id
|
from core.meta import by_remote_id
|
||||||
|
from core.meta import by_type
|
||||||
from core.meta import in_outbox
|
from core.meta import in_outbox
|
||||||
from core.meta import is_public
|
from core.meta import is_public
|
||||||
|
from core.meta import not_undo
|
||||||
from core.shared import _build_thread
|
from core.shared import _build_thread
|
||||||
from core.shared import _get_ip
|
from core.shared import _get_ip
|
||||||
from core.shared import csrf
|
from core.shared import csrf
|
||||||
|
@ -59,6 +62,7 @@ from core.shared import jsonify
|
||||||
from core.shared import login_required
|
from core.shared import login_required
|
||||||
from core.shared import noindex
|
from core.shared import noindex
|
||||||
from core.shared import paginated_query
|
from core.shared import paginated_query
|
||||||
|
from utils.blacklist import is_blacklisted
|
||||||
from utils.key import get_secret_key
|
from utils.key import get_secret_key
|
||||||
from utils.template_filters import filters
|
from utils.template_filters import filters
|
||||||
|
|
||||||
|
@ -87,14 +91,6 @@ else:
|
||||||
root_logger.setLevel(gunicorn_logger.level)
|
root_logger.setLevel(gunicorn_logger.level)
|
||||||
|
|
||||||
|
|
||||||
def is_blacklisted(url: str) -> bool:
|
|
||||||
try:
|
|
||||||
return urlparse(url).netloc in config.BLACKLIST
|
|
||||||
except Exception:
|
|
||||||
logger.exception(f"failed to blacklist for {url}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
@app.context_processor
|
@app.context_processor
|
||||||
def inject_config():
|
def inject_config():
|
||||||
q = {
|
q = {
|
||||||
|
@ -153,18 +149,24 @@ def inject_config():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.before_request
|
||||||
|
def generate_request_id():
|
||||||
|
g.request_id = uuid4().hex
|
||||||
|
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def set_x_powered_by(response):
|
def set_x_powered_by(response):
|
||||||
response.headers["X-Powered-By"] = "microblog.pub"
|
response.headers["X-Powered-By"] = "microblog.pub"
|
||||||
|
response.headers["X-Request-ID"] = g.request_id
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(ValueError)
|
@app.errorhandler(ValueError)
|
||||||
def handle_value_error(error):
|
def handle_value_error(error):
|
||||||
logger.error(
|
logger.error(
|
||||||
f"caught value error: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
f"caught value error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||||
)
|
)
|
||||||
response = flask_jsonify(message=error.args[0])
|
response = flask_jsonify(message=error.args[0], request_id=g.request_id)
|
||||||
response.status_code = 400
|
response.status_code = 400
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -172,9 +174,9 @@ def handle_value_error(error):
|
||||||
@app.errorhandler(Error)
|
@app.errorhandler(Error)
|
||||||
def handle_activitypub_error(error):
|
def handle_activitypub_error(error):
|
||||||
logger.error(
|
logger.error(
|
||||||
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
|
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||||
)
|
)
|
||||||
response = flask_jsonify(error.to_dict())
|
response = flask_jsonify({**error.to_dict(), "request_id": g.request_id})
|
||||||
response.status_code = error.status_code
|
response.status_code = error.status_code
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -182,9 +184,9 @@ def handle_activitypub_error(error):
|
||||||
@app.errorhandler(TaskError)
|
@app.errorhandler(TaskError)
|
||||||
def handle_task_error(error):
|
def handle_task_error(error):
|
||||||
logger.error(
|
logger.error(
|
||||||
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
|
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
|
||||||
)
|
)
|
||||||
response = flask_jsonify({"traceback": error.message})
|
response = flask_jsonify({"traceback": error.message, "request_id": g.request_id})
|
||||||
response.status_code = 500
|
response.status_code = 500
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -274,7 +276,6 @@ def index():
|
||||||
"meta.public": True,
|
"meta.public": True,
|
||||||
"$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}],
|
"$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}],
|
||||||
}
|
}
|
||||||
print(list(DB.activities.find(q)))
|
|
||||||
|
|
||||||
pinned = []
|
pinned = []
|
||||||
# Only fetch the pinned notes if we're on the first page
|
# Only fetch the pinned notes if we're on the first page
|
||||||
|
@ -417,7 +418,6 @@ def outbox():
|
||||||
abort(401)
|
abort(401)
|
||||||
|
|
||||||
data = request.get_json(force=True)
|
data = request.get_json(force=True)
|
||||||
print(data)
|
|
||||||
activity = ap.parse_activity(data)
|
activity = ap.parse_activity(data)
|
||||||
activity_id = post_to_outbox(activity)
|
activity_id = post_to_outbox(activity)
|
||||||
|
|
||||||
|
@ -603,32 +603,21 @@ def inbox():
|
||||||
return Response(
|
return Response(
|
||||||
status=422,
|
status=422,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
response=json.dumps({"error": "failed to decode request as JSON"}),
|
response=json.dumps(
|
||||||
|
{
|
||||||
|
"error": "failed to decode request body as JSON",
|
||||||
|
"request_id": g.request_id,
|
||||||
|
}
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check the blacklist now to see if we can return super early
|
# Check the blacklist now to see if we can return super early
|
||||||
if (
|
if is_blacklisted(data):
|
||||||
"id" in data
|
|
||||||
and is_blacklisted(data["id"])
|
|
||||||
or (
|
|
||||||
"object" in data
|
|
||||||
and isinstance(data["object"], dict)
|
|
||||||
and "id" in data["object"]
|
|
||||||
and is_blacklisted(data["object"]["id"])
|
|
||||||
)
|
|
||||||
or (
|
|
||||||
"object" in data
|
|
||||||
and isinstance(data["object"], str)
|
|
||||||
and is_blacklisted(data["object"])
|
|
||||||
)
|
|
||||||
):
|
|
||||||
logger.info(f"dropping activity from blacklisted host: {data['id']}")
|
logger.info(f"dropping activity from blacklisted host: {data['id']}")
|
||||||
return Response(status=201)
|
return Response(status=201)
|
||||||
|
|
||||||
print(f"req_headers={request.headers}")
|
logger.info(f"request_id={g.request_id} req_headers={request.headers!r}")
|
||||||
print(f"raw_data={data}")
|
logger.info(f"request_id={g.request_id} raw_data={data}")
|
||||||
logger.debug(f"req_headers={request.headers}")
|
|
||||||
logger.debug(f"raw_data={data}")
|
|
||||||
try:
|
try:
|
||||||
if not verify_request(
|
if not verify_request(
|
||||||
request.method, request.path, request.headers, request.data
|
request.method, request.path, request.headers, request.data
|
||||||
|
@ -636,7 +625,7 @@ def inbox():
|
||||||
raise Exception("failed to verify request")
|
raise Exception("failed to verify request")
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
"failed to verify request, trying to verify the payload by fetching the remote"
|
f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
remote_data = get_backend().fetch_iri(data["id"])
|
remote_data = get_backend().fetch_iri(data["id"])
|
||||||
|
@ -669,7 +658,8 @@ def inbox():
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
response=json.dumps(
|
response=json.dumps(
|
||||||
{
|
{
|
||||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
|
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
|
||||||
|
"request_id": g.request_id,
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -688,6 +678,7 @@ def inbox():
|
||||||
"geoip": geoip,
|
"geoip": geoip,
|
||||||
"tb": traceback.format_exc(),
|
"tb": traceback.format_exc(),
|
||||||
"headers": dict(request.headers),
|
"headers": dict(request.headers),
|
||||||
|
"request_id": g.request_id,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -697,16 +688,16 @@ def inbox():
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
response=json.dumps(
|
response=json.dumps(
|
||||||
{
|
{
|
||||||
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
|
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
|
||||||
|
"request_id": g.request_id,
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# We fetched the remote data successfully
|
# We fetched the remote data successfully
|
||||||
data = remote_data
|
data = remote_data
|
||||||
print(data)
|
|
||||||
activity = ap.parse_activity(data)
|
activity = ap.parse_activity(data)
|
||||||
logger.debug(f"inbox activity={activity}/{data}")
|
logger.debug(f"inbox activity={g.request_id}/{activity}/{data}")
|
||||||
post_to_inbox(activity)
|
post_to_inbox(activity)
|
||||||
|
|
||||||
return Response(status=201)
|
return Response(status=201)
|
||||||
|
@ -741,7 +732,7 @@ def followers():
|
||||||
|
|
||||||
@app.route("/following")
|
@app.route("/following")
|
||||||
def following():
|
def following():
|
||||||
q = {"box": Box.OUTBOX.value, "type": ActivityType.FOLLOW.value, "meta.undo": False}
|
q = {**in_outbox(), **by_type(ActivityType.FOLLOW), **not_undo()}
|
||||||
|
|
||||||
if is_api_request():
|
if is_api_request():
|
||||||
return jsonify(
|
return jsonify(
|
||||||
|
|
|
@ -196,7 +196,7 @@ def task_finish_post_to_inbox() -> _Response:
|
||||||
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
|
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
|
||||||
app.logger.exception(f"no retry")
|
app.logger.exception(f"no retry")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
app.logger.exception(f"failed to cache attachments for {iri}")
|
app.logger.exception(f"failed to cfinish post to inbox for {iri}")
|
||||||
raise TaskError() from err
|
raise TaskError() from err
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
|
@ -406,6 +406,8 @@ class MicroblogPubBackend(Backend):
|
||||||
new_threads = []
|
new_threads = []
|
||||||
root_reply = in_reply_to
|
root_reply = in_reply_to
|
||||||
reply = ap.fetch_remote_activity(root_reply)
|
reply = ap.fetch_remote_activity(root_reply)
|
||||||
|
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
|
||||||
|
# FIXME(tsileo): can be a 403 too, in this case what to do? not error at least
|
||||||
|
|
||||||
# Ensure the this is a local reply, of a question, with a direct "to" addressing
|
# Ensure the this is a local reply, of a question, with a direct "to" addressing
|
||||||
if (
|
if (
|
||||||
|
@ -427,7 +429,6 @@ class MicroblogPubBackend(Backend):
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
print(f"processing {create!r} and incrementing {in_reply_to}")
|
|
||||||
creply = DB.activities.find_one_and_update(
|
creply = DB.activities.find_one_and_update(
|
||||||
{"activity.object.id": in_reply_to},
|
{"activity.object.id": in_reply_to},
|
||||||
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
|
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
|
||||||
|
@ -444,6 +445,7 @@ class MicroblogPubBackend(Backend):
|
||||||
break
|
break
|
||||||
root_reply = in_reply_to
|
root_reply = in_reply_to
|
||||||
reply = ap.fetch_remote_activity(root_reply)
|
reply = ap.fetch_remote_activity(root_reply)
|
||||||
|
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
|
||||||
q = {"activity.object.id": root_reply}
|
q = {"activity.object.id": root_reply}
|
||||||
if not DB.activities.count(q):
|
if not DB.activities.count(q):
|
||||||
save(Box.REPLIES, reply)
|
save(Box.REPLIES, reply)
|
||||||
|
|
|
@ -38,8 +38,9 @@ def process_inbox(activity: ap.BaseActivity, new_meta: _NewMeta) -> None:
|
||||||
def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None:
|
def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None:
|
||||||
_logger.info(f"process_inbox activity={delete!r}")
|
_logger.info(f"process_inbox activity={delete!r}")
|
||||||
obj_id = delete.get_object_id()
|
obj_id = delete.get_object_id()
|
||||||
_logger.debug("delete object={obj_id}")
|
_logger.debug(f"delete object={obj_id}")
|
||||||
try:
|
try:
|
||||||
|
# FIXME(tsileo): call the DB here instead? like for the oubox
|
||||||
obj = ap.fetch_remote_activity(obj_id)
|
obj = ap.fetch_remote_activity(obj_id)
|
||||||
_logger.info(f"inbox_delete handle_replies obj={obj!r}")
|
_logger.info(f"inbox_delete handle_replies obj={obj!r}")
|
||||||
in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None
|
in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None
|
||||||
|
|
38
utils/blacklist.py
Normal file
38
utils/blacklist.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
from typing import Dict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def is_url_blacklisted(url: str) -> bool:
|
||||||
|
try:
|
||||||
|
return urlparse(url).netloc in config.BLACKLIST
|
||||||
|
except Exception:
|
||||||
|
logger.exception(f"failed to blacklist for {url}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_blacklisted(data: Dict[str, Any]) -> bool:
|
||||||
|
"""Returns True if the activity is coming/or referencing a blacklisted host."""
|
||||||
|
if (
|
||||||
|
"id" in data
|
||||||
|
and is_url_blacklisted(data["id"])
|
||||||
|
or (
|
||||||
|
"object" in data
|
||||||
|
and isinstance(data["object"], dict)
|
||||||
|
and "id" in data["object"]
|
||||||
|
and is_url_blacklisted(data["object"]["id"])
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
"object" in data
|
||||||
|
and isinstance(data["object"], str)
|
||||||
|
and is_url_blacklisted(data["object"])
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
|
@ -1,6 +1,7 @@
|
||||||
import base64
|
import base64
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from enum import unique
|
||||||
from gzip import GzipFile
|
from gzip import GzipFile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -13,7 +14,7 @@ from little_boxes import activitypub as ap
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
def load(url, user_agent):
|
def load(url: str, user_agent: str) -> Image:
|
||||||
"""Initializes a `PIL.Image` from the URL."""
|
"""Initializes a `PIL.Image` from the URL."""
|
||||||
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
|
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
@ -22,7 +23,7 @@ def load(url, user_agent):
|
||||||
return Image.open(BytesIO(resp.raw.read()))
|
return Image.open(BytesIO(resp.raw.read()))
|
||||||
|
|
||||||
|
|
||||||
def to_data_uri(img):
|
def to_data_uri(img: Image) -> str:
|
||||||
out = BytesIO()
|
out = BytesIO()
|
||||||
img.save(out, format=img.format)
|
img.save(out, format=img.format)
|
||||||
out.seek(0)
|
out.seek(0)
|
||||||
|
@ -30,6 +31,7 @@ def to_data_uri(img):
|
||||||
return f"data:{img.get_format_mimetype()};base64,{data}"
|
return f"data:{img.get_format_mimetype()};base64,{data}"
|
||||||
|
|
||||||
|
|
||||||
|
@unique
|
||||||
class Kind(Enum):
|
class Kind(Enum):
|
||||||
ATTACHMENT = "attachment"
|
ATTACHMENT = "attachment"
|
||||||
ACTOR_ICON = "actor_icon"
|
ACTOR_ICON = "actor_icon"
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
from typing import Dict
|
||||||
|
from typing import Set
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import opengraph
|
import opengraph
|
||||||
|
@ -14,7 +17,7 @@ from .lookup import lookup
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def links_from_note(note):
|
def links_from_note(note: Dict[str, Any]) -> Set[str]:
|
||||||
note_host = urlparse(ap._get_id(note["id"]) or "").netloc
|
note_host = urlparse(ap._get_id(note["id"]) or "").netloc
|
||||||
|
|
||||||
links = set()
|
links = set()
|
||||||
|
|
|
@ -170,7 +170,6 @@ def url_or_id(d):
|
||||||
|
|
||||||
@filters.app_template_filter()
|
@filters.app_template_filter()
|
||||||
def get_url(u):
|
def get_url(u):
|
||||||
print(f"GET_URL({u!r})")
|
|
||||||
if isinstance(u, list):
|
if isinstance(u, list):
|
||||||
for l in u:
|
for l in u:
|
||||||
if l.get("mimeType") == "text/html":
|
if l.get("mimeType") == "text/html":
|
||||||
|
@ -191,7 +190,6 @@ def get_actor(url):
|
||||||
url = url[0]
|
url = url[0]
|
||||||
if isinstance(url, dict):
|
if isinstance(url, dict):
|
||||||
url = url.get("id")
|
url = url.get("id")
|
||||||
print(f"GET_ACTOR {url}")
|
|
||||||
try:
|
try:
|
||||||
return ap.get_backend().fetch_iri(url)
|
return ap.get_backend().fetch_iri(url)
|
||||||
except (ActivityNotFoundError, ActivityGoneError):
|
except (ActivityNotFoundError, ActivityGoneError):
|
||||||
|
@ -203,8 +201,6 @@ def get_actor(url):
|
||||||
@filters.app_template_filter()
|
@filters.app_template_filter()
|
||||||
def get_answer_count(choice, obj, meta):
|
def get_answer_count(choice, obj, meta):
|
||||||
count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0)
|
count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0)
|
||||||
print(count_from_meta)
|
|
||||||
print(choice, obj, meta)
|
|
||||||
if count_from_meta:
|
if count_from_meta:
|
||||||
return count_from_meta
|
return count_from_meta
|
||||||
for option in obj.get("oneOf", obj.get("anyOf", [])):
|
for option in obj.get("oneOf", obj.get("anyOf", [])):
|
||||||
|
@ -219,7 +215,6 @@ def get_total_answers_count(obj, meta):
|
||||||
return cached
|
return cached
|
||||||
cnt = 0
|
cnt = 0
|
||||||
for choice in obj.get("anyOf", obj.get("oneOf", [])):
|
for choice in obj.get("anyOf", obj.get("oneOf", [])):
|
||||||
print(choice)
|
|
||||||
cnt += choice.get("replies", {}).get("totalItems", 0)
|
cnt += choice.get("replies", {}).get("totalItems", 0)
|
||||||
return cnt
|
return cnt
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue