From 0f5e35af9765e0f751c3f8a85ea01dab4325047a Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Tue, 30 Jul 2019 22:12:20 +0200 Subject: [PATCH] Start big cleanup --- README.md | 2 +- activity_gc.py | 2 +- api.py | 132 +++++++++ app.py | 559 +++----------------------------------- app_utils.py | 30 ++ utils/meta.py | 2 + utils/notifications.py | 74 ++++- utils/template_filters.py | 328 ++++++++++++++++++++++ 8 files changed, 599 insertions(+), 530 deletions(-) create mode 100644 api.py create mode 100644 app_utils.py create mode 100644 utils/template_filters.py diff --git a/README.md b/README.md index ed7afb4..e4ab81b 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Once the initial configuration is done, you can still tweak the config by editin ### Deployment -To spawn the docker-compose project (running this command will also update _microblog.pub_ to latest and restart the project it it's already running): +To spawn the docker-compose project (running this command will also update _microblog.pub_ to latest and restart everything if it's already running): ```shell $ make run diff --git a/activity_gc.py b/activity_gc.py index 637cd4c..3457868 100644 --- a/activity_gc.py +++ b/activity_gc.py @@ -48,7 +48,7 @@ def threads_of_interest() -> List[str]: return list(out) -def _keep(data: Dict[str, Any]): +def _keep(data: Dict[str, Any]) -> None: DB.activities.update_one({"_id": data["_id"]}, {"$set": {"meta.gc_keep": True}}) diff --git a/api.py b/api.py new file mode 100644 index 0000000..bcd39dd --- /dev/null +++ b/api.py @@ -0,0 +1,132 @@ +from functools import wraps + +import flask +from flask import abort +from flask import current_app as app +from flask import redirect +from flask import request +from flask import session +from itsdangerous import BadSignature +from little_boxes import activitypub as ap +from little_boxes.errors import NotFromOutboxError + +from app_utils import MY_PERSON +from app_utils import csrf +from app_utils import post_to_outbox +from config import ID +from config import JWT +from utils import now + +api = flask.Blueprint("api", __name__) + + +def _api_required() -> None: + if session.get("logged_in"): + if request.method not in ["GET", "HEAD"]: + # If a standard API request is made with a "login session", it must havw a CSRF token + csrf.protect() + return + + # Token verification + token = request.headers.get("Authorization", "").replace("Bearer ", "") + if not token: + # IndieAuth token + token = request.form.get("access_token", "") + + # Will raise a BadSignature on bad auth + payload = JWT.loads(token) + app.logger.info(f"api call by {payload}") + + +def api_required(f): + @wraps(f) + def decorated_function(*args, **kwargs): + try: + _api_required() + except BadSignature: + abort(401) + + return f(*args, **kwargs) + + return decorated_function + + +def _user_api_arg(key: str, **kwargs): + """Try to get the given key from the requests, try JSON body, form data and query arg.""" + if request.is_json: + oid = request.json.get(key) + else: + oid = request.args.get(key) or request.form.get(key) + + if not oid: + if "default" in kwargs: + app.logger.info(f'{key}={kwargs.get("default")}') + return kwargs.get("default") + + raise ValueError(f"missing {key}") + + app.logger.info(f"{key}={oid}") + return oid + + +def _user_api_get_note(from_outbox: bool = False): + oid = _user_api_arg("id") + app.logger.info(f"fetching {oid}") + note = ap.parse_activity(ap.get_backend().fetch_iri(oid)) + if from_outbox and not note.id.startswith(ID): + raise NotFromOutboxError( + f"cannot load {note.id}, id must be owned by the server" + ) + + return note + + +def _user_api_response(**kwargs): + _redirect = _user_api_arg("redirect", default=None) + if _redirect: + return redirect(_redirect) + + resp = flask.jsonify(**kwargs) + resp.status_code = 201 + return resp + + +@api.route("/note/delete", methods=["POST"]) +@api_required +def api_delete(): + """API endpoint to delete a Note activity.""" + note = _user_api_get_note(from_outbox=True) + + # Create the delete, same audience as the Create object + delete = ap.Delete( + actor=ID, + object=ap.Tombstone(id=note.id).to_dict(embed=True), + to=note.to, + cc=note.cc, + published=now(), + ) + + delete_id = post_to_outbox(delete) + + return _user_api_response(activity=delete_id) + + +@api.route("/boost", methods=["POST"]) +@api_required +def api_boost(): + note = _user_api_get_note() + + # Ensures the note visibility allow us to build an Announce (in respect to the post visibility) + if ap.get_visibility(note) not in [ap.Visibility.PUBLIC, ap.Visibility.UNLISTED]: + abort(400) + + announce = ap.Announce( + actor=MY_PERSON.id, + object=note.id, + to=[MY_PERSON.followers, note.attributedTo], + cc=[ap.AS_PUBLIC], + published=now(), + ) + announce_id = post_to_outbox(announce) + + return _user_api_response(activity=announce_id) diff --git a/app.py b/app.py index d22bd30..7739c0b 100644 --- a/app.py +++ b/app.py @@ -4,7 +4,6 @@ import logging import mimetypes import os import traceback -import urllib from datetime import datetime from datetime import timedelta from datetime import timezone @@ -12,12 +11,9 @@ from functools import wraps from io import BytesIO from typing import Any from typing import Dict -from typing import Optional -from typing import Tuple from urllib.parse import urlencode from urllib.parse import urlparse -import bleach import emoji_unicode import html2text import mf2py @@ -34,18 +30,15 @@ from flask import render_template from flask import request from flask import session from flask import url_for -from flask_wtf.csrf import CSRFProtect from itsdangerous import BadSignature from little_boxes import activitypub as ap from little_boxes.activitypub import ActivityType -from little_boxes.activitypub import _to_list from little_boxes.activitypub import clean_activity from little_boxes.activitypub import format_datetime from little_boxes.activitypub import get_backend from little_boxes.content_helper import parse_markdown from little_boxes.errors import ActivityGoneError from little_boxes.errors import ActivityNotFoundError -from little_boxes.errors import BadActivityError from little_boxes.errors import Error from little_boxes.errors import NotAnActivityError from little_boxes.errors import NotFromOutboxError @@ -64,13 +57,16 @@ import config from activitypub import Box from activitypub import _answer_key from activitypub import embed_collection +from api import api +from app_utils import MY_PERSON +from app_utils import back +from app_utils import csrf from config import ADMIN_API_KEY from config import BASE_URL from config import BLACKLIST from config import DB from config import DEBUG_MODE from config import DOMAIN -from config import EMOJI_TPL from config import EMOJIS from config import HEADERS from config import ICON_URL @@ -91,11 +87,10 @@ from poussetaches import PousseTaches from tasks import Tasks from utils import now from utils import opengraph -from utils import parse_datetime from utils.key import get_secret_key from utils.lookup import lookup -from utils.media import Kind from utils.notifications import set_inbox_flags +from utils.template_filters import filters p = PousseTaches( os.getenv("MICROBLOGPUB_POUSSETACHES_HOST", "http://localhost:7991"), @@ -104,15 +99,12 @@ p = PousseTaches( # p = PousseTaches("http://localhost:7991", "http://localhost:5000") -back = activitypub.MicroblogPubBackend() -ap.use_backend(back) - -MY_PERSON = ap.Person(**ME) - app = Flask(__name__) app.secret_key = get_secret_key("flask") +app.register_blueprint(filters) +app.register_blueprint(api, url_prefix="/api") app.config.update(WTF_CSRF_CHECK_DEFAULT=False) -csrf = CSRFProtect(app) +csrf.init_app(app) logger = logging.getLogger(__name__) @@ -129,10 +121,6 @@ else: SIG_AUTH = HTTPSigAuth(KEY) -H2T = html2text.HTML2Text() -H2T.ignore_links = True -H2T.ignore_images = True - def is_blacklisted(url: str) -> bool: try: @@ -210,301 +198,6 @@ def set_x_powered_by(response): return response -# HTML/templates helper -ALLOWED_TAGS = [ - "a", - "abbr", - "acronym", - "b", - "br", - "blockquote", - "code", - "pre", - "em", - "i", - "li", - "ol", - "strong", - "ul", - "span", - "div", - "p", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", -] - - -def clean_html(html): - try: - return bleach.clean(html, tags=ALLOWED_TAGS) - except Exception: - return "" - - -_GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {} - - -def _get_file_url(url, size, kind): - k = (kind, url, size) - cached = _GRIDFS_CACHE.get(k) - if cached: - return cached - - doc = MEDIA_CACHE.get_file(url, size, kind) - if doc: - u = f"/media/{str(doc._id)}" - _GRIDFS_CACHE[k] = u - return u - - # MEDIA_CACHE.cache(url, kind) - app.logger.error(f"cache not available for {url}/{size}/{kind}") - return url - - -@app.template_filter() -def visibility(v: str) -> str: - try: - return ap.Visibility[v].value.lower() - except Exception: - return v - - -@app.template_filter() -def visibility_is_public(v: str) -> bool: - return v in [ap.Visibility.PUBLIC.name, ap.Visibility.UNLISTED.name] - - -@app.template_filter() -def emojify(text): - return emoji_unicode.replace( - text, lambda e: EMOJI_TPL.format(filename=e.code_points, raw=e.unicode) - ) - - -@app.template_filter() -def gtone(n): - return n > 1 - - -@app.template_filter() -def gtnow(dtstr): - return format_datetime(datetime.now(timezone.utc)) > dtstr - - -@app.template_filter() -def remove_mongo_id(dat): - if isinstance(dat, list): - return [remove_mongo_id(item) for item in dat] - if "_id" in dat: - dat["_id"] = str(dat["_id"]) - for k, v in dat.items(): - if isinstance(v, dict): - dat[k] = remove_mongo_id(dat[k]) - return dat - - -@app.template_filter() -def get_video_link(data): - for link in data: - if link.get("mimeType", "").startswith("video/"): - return link.get("href") - return None - - -@app.template_filter() -def get_actor_icon_url(url, size): - return _get_file_url(url, size, Kind.ACTOR_ICON) - - -@app.template_filter() -def get_attachment_url(url, size): - return _get_file_url(url, size, Kind.ATTACHMENT) - - -@app.template_filter() -def get_og_image_url(url, size=100): - try: - return _get_file_url(url, size, Kind.OG_IMAGE) - except Exception: - return "" - - -@app.template_filter() -def permalink_id(val): - return str(hash(val)) - - -@app.template_filter() -def quote_plus(t): - return urllib.parse.quote_plus(t) - - -@app.template_filter() -def is_from_outbox(t): - return t.startswith(ID) - - -@app.template_filter() -def clean(html): - out = clean_html(html) - return emoji_unicode.replace( - out, lambda e: EMOJI_TPL.format(filename=e.code_points, raw=e.unicode) - ) - - -@app.template_filter() -def html2plaintext(body): - return H2T.handle(body) - - -@app.template_filter() -def domain(url): - return urlparse(url).netloc - - -@app.template_filter() -def url_or_id(d): - if isinstance(d, dict): - if "url" in d: - return d["url"] - else: - return d["id"] - return "" - - -@app.template_filter() -def get_url(u): - print(f"GET_URL({u!r})") - if isinstance(u, list): - for l in u: - if l.get("mimeType") == "text/html": - u = l - if isinstance(u, dict): - return u["href"] - elif isinstance(u, str): - return u - else: - return u - - -@app.template_filter() -def get_actor(url): - if not url: - return None - if isinstance(url, list): - url = url[0] - if isinstance(url, dict): - url = url.get("id") - print(f"GET_ACTOR {url}") - try: - return get_backend().fetch_iri(url) - except (ActivityNotFoundError, ActivityGoneError): - return f"Deleted<{url}>" - except Exception as exc: - return f"Error<{url}/{exc!r}>" - - -@app.template_filter() -def format_time(val): - if val: - dt = parse_datetime(val) - return datetime.strftime(dt, "%B %d, %Y, %H:%M %p") - return val - - -@app.template_filter() -def format_ts(val): - return datetime.fromtimestamp(val).strftime("%B %d, %Y, %H:%M %p") - - -@app.template_filter() -def gt_ts(val): - return datetime.now() > datetime.fromtimestamp(val) - - -@app.template_filter() -def format_timeago(val): - if val: - dt = parse_datetime(val) - return timeago.format(dt.astimezone(timezone.utc), datetime.now(timezone.utc)) - return val - - -@app.template_filter() -def has_type(doc, _types): - for _type in _to_list(_types): - if _type in _to_list(doc["type"]): - return True - return False - - -@app.template_filter() -def has_actor_type(doc): - # FIXME(tsileo): skipping the last one "Question", cause Mastodon sends question restuls as an update coming from - # the question... Does Pleroma do that too? - for t in ap.ACTOR_TYPES[:-1]: - if has_type(doc, t.value): - return True - return False - - -def _is_img(filename): - filename = filename.lower() - if ( - filename.endswith(".png") - or filename.endswith(".jpg") - or filename.endswith(".jpeg") - or filename.endswith(".gif") - or filename.endswith(".svg") - ): - return True - return False - - -@app.template_filter() -def not_only_imgs(attachment): - for a in attachment: - if isinstance(a, dict) and not _is_img(a["url"]): - return True - if isinstance(a, str) and not _is_img(a): - return True - return False - - -@app.template_filter() -def is_img(filename): - return _is_img(filename) - - -@app.template_filter() -def get_answer_count(choice, obj, meta): - count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0) - print(count_from_meta) - print(choice, obj, meta) - if count_from_meta: - return count_from_meta - for option in obj.get("oneOf", obj.get("anyOf", [])): - if option.get("name") == choice: - return option.get("replies", {}).get("totalItems", 0) - - -@app.template_filter() -def get_total_answers_count(obj, meta): - cached = meta.get("question_replies", 0) - if cached: - return cached - cnt = 0 - print("OKI", obj) - for choice in obj.get("anyOf", obj.get("oneOf", [])): - print(choice) - cnt += choice.get("replies", {}).get("totalItems", 0) - return cnt - - def add_response_headers(headers={}): """This decorator adds the headers passed in to the response""" @@ -875,44 +568,10 @@ def paginated_query(db, q, limit=25, sort_key="_id"): return outbox_data, older_than, newer_than -CACHING = True - - -def _get_cached(type_="html", arg=None): - if not CACHING: - return None - logged_in = session.get("logged_in") - if not logged_in: - cached = DB.cache2.find_one({"path": request.path, "type": type_, "arg": arg}) - if cached: - app.logger.info("from cache") - return cached["response_data"] - return None - - -def _cache(resp, type_="html", arg=None): - if not CACHING: - return None - logged_in = session.get("logged_in") - if not logged_in: - DB.cache2.update_one( - {"path": request.path, "type": type_, "arg": arg}, - {"$set": {"response_data": resp, "date": datetime.now(timezone.utc)}}, - upsert=True, - ) - return None - - @app.route("/") def index(): if is_api_request(): return jsonify(**ME) - cache_arg = ( - f"{request.args.get('older_than', '')}:{request.args.get('newer_than', '')}" - ) - cached = _get_cached("html", cache_arg) - if cached: - return cached q = { "box": Box.OUTBOX.value, @@ -949,7 +608,6 @@ def index(): newer_than=newer_than, pinned=pinned, ) - _cache(resp, "html", cache_arg) return resp @@ -1109,39 +767,33 @@ def note_by_id(note_id): @app.route("/nodeinfo") def nodeinfo(): - response = _get_cached("api") - cached = True - if not response: - cached = False - q = { - "box": Box.OUTBOX.value, - "meta.deleted": False, - "type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]}, + q = { + "box": Box.OUTBOX.value, + "meta.deleted": False, + "type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]}, + } + + response = json.dumps( + { + "version": "2.1", + "software": { + "name": "microblogpub", + "version": f"{VERSION}", + "repository": "https://github.com/tsileo/microblog.pub", + }, + "protocols": ["activitypub"], + "services": {"inbound": [], "outbound": []}, + "openRegistrations": False, + "usage": {"users": {"total": 1}, "localPosts": DB.activities.count(q)}, + "metadata": { + "sourceCode": "https://github.com/tsileo/microblog.pub", + "nodeName": f"@{USERNAME}@{DOMAIN}", + "version": VERSION, + "version_date": VERSION_DATE, + }, } + ) - response = json.dumps( - { - "version": "2.1", - "software": { - "name": "microblogpub", - "version": f"{VERSION}", - "repository": "https://github.com/tsileo/microblog.pub", - }, - "protocols": ["activitypub"], - "services": {"inbound": [], "outbound": []}, - "openRegistrations": False, - "usage": {"users": {"total": 1}, "localPosts": DB.activities.count(q)}, - "metadata": { - "sourceCode": "https://github.com/tsileo/microblog.pub", - "nodeName": f"@{USERNAME}@{DOMAIN}", - "version": VERSION, - "version_date": VERSION_DATE, - }, - } - ) - - if not cached: - _cache(response, "api") return Response( headers={ "Content-Type": "application/json; profile=http://nodeinfo.diaspora.software/ns/schema/2.1#" @@ -1735,47 +1387,6 @@ def _user_api_response(**kwargs): return resp -@app.route("/api/note/delete", methods=["POST"]) -@api_required -def api_delete(): - """API endpoint to delete a Note activity.""" - note = _user_api_get_note(from_outbox=True) - - # Create the delete, same audience as the Create object - delete = ap.Delete( - actor=ID, - object=ap.Tombstone(id=note.id).to_dict(embed=True), - to=note.to, - cc=note.cc, - published=now(), - ) - - delete_id = post_to_outbox(delete) - - return _user_api_response(activity=delete_id) - - -@app.route("/api/boost", methods=["POST"]) -@api_required -def api_boost(): - note = _user_api_get_note() - - # Ensures the note visibility allow us to build an Announce (in respect to the post visibility) - if ap.get_visibility(note) not in [ap.Visibility.PUBLIC, ap.Visibility.UNLISTED]: - abort(400) - - announce = ap.Announce( - actor=MY_PERSON.id, - object=note.id, - to=[MY_PERSON.followers, note.attributedTo], - cc=[ap.AS_PUBLIC], - published=now(), - ) - announce_id = post_to_outbox(announce) - - return _user_api_response(activity=announce_id) - - @app.route("/api/mark_notifications_as_read", methods=["POST"]) @api_required def api_mark_notification_as_read(): @@ -3124,118 +2735,20 @@ def task_process_new_activity(): activity = ap.fetch_remote_activity(iri) app.logger.info(f"activity={activity!r}") - # Is the activity expected? - # following = ap.get_backend().following() - should_forward = False - should_delete = False - should_keep = False - flags = {} if not activity.published: flags[_meta(MetaKey.PUBLISHED)] = now() + else: + flags[_meta(MetaKey.PUBLISHED)] = activity.published set_inbox_flags(activity, flags) app.logger.info(f"a={activity}, flags={flags!r}") if flags: DB.activities.update_one({"remote_id": activity.id}, {"$set": flags}) - tag_stream = False - if activity.has_type(ap.ActivityType.ANNOUNCE): - # FIXME(tsileo): Ensure it's follower and store into a "dead activities" DB - try: - activity.get_object() - tag_stream = True - if activity.get_object_id().startswith(BASE_URL): - should_keep = True - except (NotAnActivityError, BadActivityError): - app.logger.exception(f"failed to get announce object for {activity!r}") - # Most likely on OStatus notice - tag_stream = False - should_delete = True - except (ActivityGoneError, ActivityNotFoundError): - # The announced activity is deleted/gone, drop it - should_delete = True - - elif activity.has_type(ap.ActivityType.FOLLOW): - # FIXME(tsileo): ensure it's a follow where the server is the object - should_keep = True - - elif activity.has_type(ap.ActivityType.CREATE): - note = activity.get_object() - in_reply_to = note.get_in_reply_to() - # Make the note part of the stream if it's not a reply, or if it's a local reply **and** it's not a poll - # answer - # FIXME(tsileo): this will block "regular replies" to a Poll, maybe the adressing will help make the - # difference? - if not in_reply_to or ( - in_reply_to.startswith(ID) - and not note.has_type(ap.ActivityType.QUESTION) - ): - tag_stream = True - - # FIXME(tsileo): check for direct addressing in the to, cc, bcc... fields - if (in_reply_to and in_reply_to.startswith(ID)) or note.has_mention(ID): - should_keep = True - - if in_reply_to: - try: - reply = ap.fetch_remote_activity(note.get_in_reply_to()) - if ( - reply.id.startswith(ID) or reply.has_mention(ID) - ) and activity.is_public(): - # The reply is public "local reply", forward the reply (i.e. the original activity) to the - # original recipients - should_forward = True - should_keep = True - except NotAnActivityError: - # Most likely a reply to an OStatus notce - should_delete = True - - # (partial) Ghost replies handling - # [X] This is the first time the server has seen this Activity. - should_forward = False - local_followers = ID + "/followers" - for field in ["to", "cc"]: - if field in activity._data: - if local_followers in activity._data[field]: - # [X] The values of to, cc, and/or audience contain a Collection owned by the server. - should_forward = True - - # [X] The values of inReplyTo, object, target and/or tag are objects owned by the server - if not (in_reply_to and in_reply_to.startswith(ID)): - should_forward = False - - elif activity.has_type(ap.ActivityType.DELETE): - note = DB.activities.find_one( - {"activity.object.id": activity.get_object_id()} - ) - if note and note["meta"].get("forwarded", False): - # If the activity was originally forwarded, forward the delete too - should_forward = True - - if should_forward: - app.logger.info(f"will forward {activity!r} to followers") - Tasks.forward_activity(activity.id) - - if should_delete: - app.logger.info(f"will soft delete {activity!r}") - - app.logger.info(f"{iri} tag_stream={tag_stream}") - DB.activities.update_one( - {"remote_id": activity.id}, - { - "$set": { - "meta.keep": should_keep, - "meta.stream": tag_stream, - "meta.forwarded": should_forward, - "meta.deleted": should_delete, - } - }, - ) - app.logger.info(f"new activity {iri} processed") - if not should_delete and not activity.has_type(ap.ActivityType.DELETE): + if not activity.has_type(ap.ActivityType.DELETE): Tasks.cache_actor(iri) except (ActivityGoneError, ActivityNotFoundError): app.logger.exception(f"dropping activity {iri}, skip processing") diff --git a/app_utils.py b/app_utils.py new file mode 100644 index 0000000..15b4c82 --- /dev/null +++ b/app_utils.py @@ -0,0 +1,30 @@ +from flask_wtf.csrf import CSRFProtect +from little_boxes import activitypub as ap + +import activitypub +from activitypub import Box +from config import me +from tasks import Tasks + +csrf = CSRFProtect() + + +back = activitypub.MicroblogPubBackend() +ap.use_backend(back) + +MY_PERSON = ap.Person(**ME) + + +def post_to_outbox(activity: ap.BaseActivity) -> str: + if activity.has_type(ap.CREATE_TYPES): + activity = activity.build_create() + + # Assign create a random ID + obj_id = back.random_object_id() + + activity.set_id(back.activity_url(obj_id), obj_id) + + back.save(Box.OUTBOX, activity) + Tasks.cache_actor(activity.id) + Tasks.finish_post_to_outbox(activity.id) + return activity.id diff --git a/utils/meta.py b/utils/meta.py index d3381a6..7780003 100644 --- a/utils/meta.py +++ b/utils/meta.py @@ -20,6 +20,8 @@ class MetaKey(Enum): NOTIFICATION = "notification" NOTIFICATION_UNREAD = "notification_unread" NOTIFICATION_FOLLOWS_BACK = "notification_follows_back" + POLL_ANSWER = "poll_answer" + STREAM = "stream" ACTOR_ID = "actor_id" UNDO = "undo" PUBLISHED = "published" diff --git a/utils/notifications.py b/utils/notifications.py index a984f34..e3888f6 100644 --- a/utils/notifications.py +++ b/utils/notifications.py @@ -2,6 +2,7 @@ import logging from functools import singledispatch from typing import Any from typing import Dict +from urllib.parse import urlparse from little_boxes import activitypub as ap @@ -19,11 +20,17 @@ _logger = logging.getLogger(__name__) _NewMeta = Dict[str, Any] +_LOCAL_NETLOC = urlparse(BASE_URL).netloc + def _is_from_outbox(activity: ap.BaseActivity) -> bool: return activity.id.startswith(BASE_URL) +def _is_local(url: str) -> bool: + return urlparse(url).netloc == _LOCAL_NETLOC + + def _flag_as_notification(activity: ap.BaseActivity, new_meta: _NewMeta) -> None: new_meta.update( {_meta(MetaKey.NOTIFICATION): True, _meta(MetaKey.NOTIFICATION_UNREAD): True} @@ -31,8 +38,14 @@ def _flag_as_notification(activity: ap.BaseActivity, new_meta: _NewMeta) -> None return None +def _set_flag(meta: _NewMeta, meta_key: MetaKey, value: Any = True) -> None: + meta.update({_meta(meta_key): value}) + return None + + @singledispatch def set_inbox_flags(activity: ap.BaseActivity, new_meta: _NewMeta) -> None: + _logger.warning(f"skipping {activity!r}") return None @@ -58,13 +71,15 @@ def _accept_set_inbox_flags(activity: ap.Accept, new_meta: _NewMeta) -> None: # This Accept will be a "You started following $actor" notification _flag_as_notification(activity, new_meta) - new_meta.update({_meta(MetaKey.NOTIFICATION_FOLLOWS_BACK): follows_back}) + _set_flag(new_meta, MetaKey.GC_KEEP) + _set_flag(new_meta, MetaKey.NOTIFICATION_FOLLOWS_BACK, follows_back) return None @set_inbox_flags.register def _follow_set_inbox_flags(activity: ap.Follow, new_meta: _NewMeta) -> None: """Handle notification for new followers.""" + _logger.info(f"set_inbox_flags activity={activity!r}") # Check if we're already following this actor follows_back = False accept_query = { @@ -83,12 +98,14 @@ def _follow_set_inbox_flags(activity: ap.Follow, new_meta: _NewMeta) -> None: # This Follow will be a "$actor started following you" notification _flag_as_notification(activity, new_meta) - new_meta.update({_meta(MetaKey.NOTIFICATION_FOLLOWS_BACK): follows_back}) + _set_flag(new_meta, MetaKey.GC_KEEP) + _set_flag(new_meta, MetaKey.NOTIFICATION_FOLLOWS_BACK, follows_back) return None @set_inbox_flags.register def _like_set_inbox_flags(activity: ap.Like, new_meta: _NewMeta) -> None: + _logger.info(f"set_inbox_flags activity={activity!r}") # Is it a Like of local acitivty/from the outbox if _is_from_outbox(activity.get_object()): # Flag it as a notification @@ -98,29 +115,33 @@ def _like_set_inbox_flags(activity: ap.Like, new_meta: _NewMeta) -> None: Tasks.cache_object(activity.id) # Also set the "keep mark" for the GC (as we want to keep it forever) - new_meta.update({_meta(MetaKey.GC_KEEP): True}) + _set_flag(new_meta, MetaKey.GC_KEEP) return None @set_inbox_flags.register def _announce_set_inbox_flags(activity: ap.Announce, new_meta: _NewMeta) -> None: + _logger.info(f"set_inbox_flags activity={activity!r}") # Is it a Like of local acitivty/from the outbox if _is_from_outbox(activity.get_object()): # Flag it as a notification _flag_as_notification(activity, new_meta) # Also set the "keep mark" for the GC (as we want to keep it forever) - new_meta.update({_meta(MetaKey.GC_KEEP): True}) + _set_flag(new_meta, MetaKey.GC_KEEP) # Cache the object in all case (for display on the notifcation page **and** the stream page) Tasks.cache_object(activity.id) + # Display it in the stream + _set_flag(new_meta, MetaKey.STREAM) return None @set_inbox_flags.register def _undo_set_inbox_flags(activity: ap.Undo, new_meta: _NewMeta) -> None: + _logger.info(f"set_inbox_flags activity={activity!r}") obj = activity.get_object() if obj.has_type(ap.ActivityType.FOLLOW): @@ -128,6 +149,49 @@ def _undo_set_inbox_flags(activity: ap.Undo, new_meta: _NewMeta) -> None: _flag_as_notification(activity, new_meta) # Also set the "keep mark" for the GC (as we want to keep it forever) - new_meta.update({_meta(MetaKey.GC_KEEP): True}) + _set_flag(new_meta, MetaKey.GC_KEEP) + + return None + + +@set_inbox_flags.register +def _create_set_inbox_flags(activity: ap.Create, new_meta: _NewMeta) -> None: + _logger.info(f"set_inbox_flags activity={activity!r}") + obj = activity.get_object() + + _set_flag(new_meta, MetaKey.POLL_ANSWER, False) + + in_reply_to = obj.get_in_reply_to() + + # Check if it's a local reply + if in_reply_to and _is_local(in_reply_to): + # TODO(tsileo): fetch the reply to check for poll answers more precisely + # reply_of = ap.fetch_remote_activity(in_reply_to) + + # Ensure it's not a poll answer + if obj.name and not obj.content: + _set_flag(new_meta, MetaKey.POLL_ANSWER) + return None + + # Flag it as a notification + _flag_as_notification(activity, new_meta) + + # Also set the "keep mark" for the GC (as we want to keep it forever) + _set_flag(new_meta, MetaKey.GC_KEEP) + + return None + + # Check for mention + for mention in obj.get_mentions(): + if mention.href and _is_local(mention.href): + # Flag it as a notification + _flag_as_notification(activity, new_meta) + + # Also set the "keep mark" for the GC (as we want to keep it forever) + _set_flag(new_meta, MetaKey.GC_KEEP) + + if not in_reply_to: + # A good candidate for displaying in the stream + _set_flag(new_meta, MetaKey.STREAM) return None diff --git a/utils/template_filters.py b/utils/template_filters.py new file mode 100644 index 0000000..41a8163 --- /dev/null +++ b/utils/template_filters.py @@ -0,0 +1,328 @@ +import logging +import urllib +from datetime import datetime +from datetime import timezone +from typing import Dict +from typing import Optional +from typing import Tuple +from urllib.parse import urlparse + +import bleach +import emoji_unicode +import flask +import html2text +import timeago +from little_boxes import activitypub as ap +from little_boxes.activitypub import _to_list +from little_boxes.errors import ActivityGoneError +from little_boxes.errors import ActivityNotFoundError + +from activitypub import _answer_key +from config import EMOJI_TPL +from config import ID +from config import MEDIA_CACHE +from utils import parse_datetime +from utils.media import Kind + +_logger = logging.getLogger(__name__) + +H2T = html2text.HTML2Text() +H2T.ignore_links = True +H2T.ignore_images = True + + +filters = flask.Blueprint('filters', __name__) + + +@filters.app_template_filter() +def visibility(v: str) -> str: + try: + return ap.Visibility[v].value.lower() + except Exception: + return v + + +@filters.app_template_filter() +def visibility_is_public(v: str) -> bool: + return v in [ap.Visibility.PUBLIC.name, ap.Visibility.UNLISTED.name] + + +@filters.app_template_filter() +def emojify(text): + return emoji_unicode.replace( + text, lambda e: EMOJI_TPL.format(filename=e.code_points, raw=e.unicode) + ) + + +# HTML/templates helper +ALLOWED_TAGS = [ + "a", + "abbr", + "acronym", + "b", + "br", + "blockquote", + "code", + "pre", + "em", + "i", + "li", + "ol", + "strong", + "ul", + "span", + "div", + "p", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", +] + + +def clean_html(html): + try: + return bleach.clean(html, tags=ALLOWED_TAGS) + except Exception: + return "" + + +@filters.app_template_filter() +def gtone(n): + return n > 1 + + +@filters.app_template_filter() +def gtnow(dtstr): + return ap.format_datetime(datetime.now(timezone.utc)) > dtstr + + +@filters.app_template_filter() +def clean(html): + out = clean_html(html) + return emoji_unicode.replace( + out, lambda e: EMOJI_TPL.format(filename=e.code_points, raw=e.unicode) + ) + + +@filters.app_template_filter() +def permalink_id(val): + return str(hash(val)) + + +@filters.app_template_filter() +def quote_plus(t): + return urllib.parse.quote_plus(t) + + +@filters.app_template_filter() +def is_from_outbox(t): + return t.startswith(ID) + + +@filters.app_template_filter() +def html2plaintext(body): + return H2T.handle(body) + + +@filters.app_template_filter() +def domain(url): + return urlparse(url).netloc + + +@filters.app_template_filter() +def format_time(val): + if val: + dt = parse_datetime(val) + return datetime.strftime(dt, "%B %d, %Y, %H:%M %p") + return val + + +@filters.app_template_filter() +def format_ts(val): + return datetime.fromtimestamp(val).strftime("%B %d, %Y, %H:%M %p") + + +@filters.app_template_filter() +def gt_ts(val): + return datetime.now() > datetime.fromtimestamp(val) + + +@filters.app_template_filter() +def format_timeago(val): + if val: + dt = parse_datetime(val) + return timeago.format(dt.astimezone(timezone.utc), datetime.now(timezone.utc)) + return val + + +@filters.app_template_filter() +def url_or_id(d): + if isinstance(d, dict): + if "url" in d: + return d["url"] + else: + return d["id"] + return "" + + +@filters.app_template_filter() +def get_url(u): + print(f"GET_URL({u!r})") + if isinstance(u, list): + for l in u: + if l.get("mimeType") == "text/html": + u = l + if isinstance(u, dict): + return u["href"] + elif isinstance(u, str): + return u + else: + return u + + +@filters.app_template_filter() +def get_actor(url): + if not url: + return None + if isinstance(url, list): + url = url[0] + if isinstance(url, dict): + url = url.get("id") + print(f"GET_ACTOR {url}") + try: + return ap.get_backend().fetch_iri(url) + except (ActivityNotFoundError, ActivityGoneError): + return f"Deleted<{url}>" + except Exception as exc: + return f"Error<{url}/{exc!r}>" + + +@filters.app_template_filter() +def get_answer_count(choice, obj, meta): + count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0) + print(count_from_meta) + print(choice, obj, meta) + if count_from_meta: + return count_from_meta + for option in obj.get("oneOf", obj.get("anyOf", [])): + if option.get("name") == choice: + return option.get("replies", {}).get("totalItems", 0) + + +@filters.app_template_filter() +def get_total_answers_count(obj, meta): + cached = meta.get("question_replies", 0) + if cached: + return cached + cnt = 0 + for choice in obj.get("anyOf", obj.get("oneOf", [])): + print(choice) + cnt += choice.get("replies", {}).get("totalItems", 0) + return cnt + + +_GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {} + + +def _get_file_url(url, size, kind): + k = (kind, url, size) + cached = _GRIDFS_CACHE.get(k) + if cached: + return cached + + doc = MEDIA_CACHE.get_file(url, size, kind) + if doc: + u = f"/media/{str(doc._id)}" + _GRIDFS_CACHE[k] = u + return u + + # MEDIA_CACHE.cache(url, kind) + _logger.error(f"cache not available for {url}/{size}/{kind}") + return url + + +@filters.app_template_filter() +def get_actor_icon_url(url, size): + return _get_file_url(url, size, Kind.ACTOR_ICON) + + +@filters.app_template_filter() +def get_attachment_url(url, size): + return _get_file_url(url, size, Kind.ATTACHMENT) + + +@filters.app_template_filter() +def get_og_image_url(url, size=100): + try: + return _get_file_url(url, size, Kind.OG_IMAGE) + except Exception: + return "" + + +@filters.app_template_filter() +def remove_mongo_id(dat): + if isinstance(dat, list): + return [remove_mongo_id(item) for item in dat] + if "_id" in dat: + dat["_id"] = str(dat["_id"]) + for k, v in dat.items(): + if isinstance(v, dict): + dat[k] = remove_mongo_id(dat[k]) + return dat + + +@filters.app_template_filter() +def get_video_link(data): + for link in data: + if link.get("mimeType", "").startswith("video/"): + return link.get("href") + return None + + +@filters.app_template_filter() +def has_type(doc, _types): + for _type in _to_list(_types): + if _type in _to_list(doc["type"]): + return True + return False + + +@filters.app_template_filter() +def has_actor_type(doc): + # FIXME(tsileo): skipping the last one "Question", cause Mastodon sends question restuls as an update coming from + # the question... Does Pleroma do that too? + for t in ap.ACTOR_TYPES[:-1]: + if has_type(doc, t.value): + return True + return False + + +def _is_img(filename): + filename = filename.lower() + if ( + filename.endswith(".png") + or filename.endswith(".jpg") + or filename.endswith(".jpeg") + or filename.endswith(".gif") + or filename.endswith(".svg") + ): + return True + return False + + +@filters.app_template_filter() +def not_only_imgs(attachment): + for a in attachment: + if isinstance(a, dict) and not _is_img(a["url"]): + return True + if isinstance(a, str) and not _is_img(a): + return True + return False + + +@filters.app_template_filter() +def is_img(filename): + return _is_img(filename)