Lot of migration work (better normalization), and more visibility work

This commit is contained in:
Thomas Sileo 2019-07-14 23:57:23 +02:00
parent 152bcf2b26
commit 243e28fa75
8 changed files with 239 additions and 20 deletions

View file

@ -8,6 +8,7 @@ from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from urllib.parse import urlparse
from bson.objectid import ObjectId
from cachetools import LRUCache
@ -114,9 +115,17 @@ class MicroblogPubBackend(Backend):
def save(self, box: Box, activity: ap.BaseActivity) -> None:
"""Custom helper for saving an activity to the DB."""
is_public = True
if activity.has_type(ap.ActivityType.CREATE) and not activity.is_public():
visibility = ap.get_visibility(activity)
is_public = False
if visibility in [ap.Visibility.PUBLIC, ap.Visibility.UNLISTED]:
is_public = True
object_id = None
try:
object_id = activity.get_object_id()
except ValueError:
pass
actor_id = activity.get_actor().id
DB.activities.insert_one(
{
@ -124,7 +133,16 @@ class MicroblogPubBackend(Backend):
"activity": activity.to_dict(),
"type": _to_list(activity.type),
"remote_id": activity.id,
"meta": {"undo": False, "deleted": False, "public": is_public},
"meta": {
"undo": False,
"deleted": False,
"public": is_public,
"server": urlparse(activity.id).netloc,
"visibility": visibility.name,
"actor_id": actor_id,
"object_id": object_id,
"poll_answer": False,
},
}
)
@ -481,6 +499,15 @@ class MicroblogPubBackend(Backend):
@ensure_it_is_me
def outbox_create(self, as_actor: ap.Person, create: ap.Create) -> None:
obj = create.get_object()
# Flag the activity as a poll answer if needed
print(f"POLL ANSWER ChECK {obj.get_in_reply_to()} {obj.name} {obj.content}")
if obj.get_in_reply_to() and obj.name and not obj.content:
DB.activities.update_one(
{"remote_id": create.id}, {"$set": {"meta.poll_answer": True}}
)
self._handle_replies(as_actor, create)
@ensure_it_is_me
@ -540,7 +567,13 @@ class MicroblogPubBackend(Backend):
DB.activities.update_one(
{"remote_id": create.id},
{"$set": {"meta.answer_to": question.id, "meta.stream": False}},
{
"$set": {
"meta.answer_to": question.id,
"meta.stream": False,
"meta.poll_answer": True,
}
},
)
return None

25
app.py
View file

@ -144,14 +144,13 @@ def inject_config():
{"box": Box.OUTBOX.value, "$or": [q, {"type": "Announce", "meta.undo": False}]}
).count()
# FIXME(tsileo): rename to all_count, and remove poll answers from it
with_replies_count = DB.activities.find(
{
all_q = {
"box": Box.OUTBOX.value,
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
"meta.undo": False,
"meta.deleted": False,
"meta.poll_answer": False,
}
).count()
liked_count = DB.activities.count(
{
"box": Box.OUTBOX.value,
@ -181,7 +180,7 @@ def inject_config():
following_count=DB.activities.count(following_q) if logged_in else 0,
notes_count=notes_count,
liked_count=liked_count,
with_replies_count=with_replies_count if logged_in else 0,
with_replies_count=DB.activities.count(all_q) if logged_in else 0,
me=ME,
base_url=config.BASE_URL,
)
@ -916,6 +915,7 @@ def all():
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
"meta.deleted": False,
"meta.undo": False,
"meta.poll_answer": False,
}
outbox_data, older_than, newer_than = paginated_query(DB.activities, q)
@ -1218,7 +1218,7 @@ def outbox():
if request.method == "GET":
if not is_api_request():
abort(404)
# TODO(tsileo): returns the whole outbox if authenticated
# TODO(tsileo): returns the whole outbox if authenticated and look at OCAP support
q = {
"box": Box.OUTBOX.value,
"meta.deleted": False,
@ -1253,7 +1253,11 @@ def outbox():
@app.route("/outbox/<item_id>")
def outbox_detail(item_id):
doc = DB.activities.find_one(
{"box": Box.OUTBOX.value, "remote_id": back.activity_url(item_id)}
{
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.public": True,
}
)
if not doc:
abort(404)
@ -1269,7 +1273,11 @@ def outbox_detail(item_id):
@app.route("/outbox/<item_id>/activity")
def outbox_activity(item_id):
data = DB.activities.find_one(
{"box": Box.OUTBOX.value, "remote_id": back.activity_url(item_id)}
{
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.public": True,
}
)
if not data:
abort(404)
@ -1295,6 +1303,7 @@ def outbox_activity_replies(item_id):
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.deleted": False,
"meta.public": True,
}
)
if not data:
@ -1305,6 +1314,7 @@ def outbox_activity_replies(item_id):
q = {
"meta.deleted": False,
"meta.public": True,
"type": ActivityType.CREATE.value,
"activity.object.inReplyTo": obj.get_object().id,
}
@ -1330,6 +1340,7 @@ def outbox_activity_likes(item_id):
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.deleted": False,
"meta.public": True,
}
)
if not data:

101
migrations.py Normal file
View file

@ -0,0 +1,101 @@
"""Migrations that will be run automatically at startup."""
from typing import Any
from typing import Dict
from urllib.parse import urlparse
from little_boxes import activitypub as ap
from utils.migrations import DB
from utils.migrations import Migration
from utils.migrations import logger
from utils.migrations import perform # noqa: just here for export
class _1_MetaMigrationt(Migration):
"""Add new metadata to simplify querying."""
def __guess_visibility(self, data: Dict[str, Any]) -> ap.Visibility:
to = data.get("to", [])
cc = data.get("cc", [])
if ap.AS_PUBLIC in to:
return ap.Visibility.PUBLIC
elif ap.AS_PUBLIC in cc:
return ap.Visibility.UNLISTED
else:
# Uses a bit of heuristic here, it's too expensive to fetch the actor, so assume the followers
# collection has "/collection" in it (which is true for most software), and at worst, we will
# classify it as "DIRECT" which behave the same as "FOLLOWERS_ONLY" (i.e. no Announce)
followers_only = False
for item in to:
if "/followers" in item:
followers_only = True
break
if not followers_only:
for item in cc:
if "/followers" in item:
followers_only = True
break
if followers_only:
return ap.Visibility.FOLLOWERS_ONLY
return ap.Visibility.DIRECT
def migrate(self) -> None: # noqa: C901 # too complex
for data in DB.activities.find():
logger.info(f"before={data}")
obj = data["activity"].get("object")
set_meta: Dict[str, Any] = {}
# Set `meta.object_id` (str)
if not data["meta"].get("object_id"):
set_meta["meta.object_id"] = None
if obj:
if isinstance(obj, str):
set_meta["meta.object_id"] = data["activity"]["object"]
elif isinstance(obj, dict):
obj_id = obj.get("id")
if obj_id:
set_meta["meta.object_id"] = obj_id
# Set `meta.object_visibility` (str)
if not data["meta"].get("object_visibility"):
set_meta["meta.object_visibility"] = None
object_id = data["meta"].get("object_id") or set_meta.get("meta.object_id")
if object_id:
obj = data["meta"].get("object") or data["activity"].get("object")
if isinstance(obj, dict):
set_meta["meta.object_visibility"] = self.__guess_visibility(obj).name
# Set `meta.actor_id` (str)
if not data["meta"].get("actor_id"):
set_meta["meta.actor_id"] = None
actor = data["activity"].get("actor")
if actor:
if isinstance(actor, str):
set_meta["meta.actor_id"] = data["activity"]["actor"]
elif isinstance(actor, dict):
actor_id = actor.get("id")
if actor_id:
set_meta["meta.actor_id"] = actor_id
# Set `meta.poll_answer` (bool)
if not data["meta"].get("poll_answer"):
set_meta["meta.poll_answer"] = False
if obj:
if isinstance(obj, dict):
if (
obj.get("name")
and not obj.get("content")
and obj.get("inReplyTo")
):
set_meta["meta.poll_answer"] = True
# Set `meta.visibility` (str)
if not data["meta"].get("visibility"):
set_meta["meta.visibility"] = self.__guess_visibility(data["activity"]).name
if not data["meta"].get("server"):
set_meta["meta.server"] = urlparse(data["remote_id"]).netloc
logger.info(f"meta={set_meta}\n")
DB.activities.update_one({"_id": data["_id"]}, {"$set": set_meta})

1
run.sh
View file

@ -1,3 +1,4 @@
#!/bin/bash
python -c "import logging; logging.basicConfig(level=logging.DEBUG); import migrations; migrations.perform()"
python -c "import config; config.create_indexes()"
gunicorn -t 600 -w 5 -b 0.0.0.0:5005 --log-level debug app:app

View file

@ -270,6 +270,15 @@ a:hover {
float: left;
border-radius:2px;
}
.bar-icon {
background: $background-color;
padding: 5px;
color: $color-light;
margin-right: 10px;
float: left;
border: none;
}
.bar-item:hover {
background: $primary-color;
color: $background-color;

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>{% block title %}{{ config.NAME }}{% endblock %}'s microblog</title>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="/static/css/feathericon.min.css">
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token">
{% if not request.args.get("older_than") and not request.args.get("previous_than") %}<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">{% endif %}

View file

@ -56,6 +56,7 @@
{% if not perma %}
<span style="float:right;width: 20%;text-align: right;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;display: block;">
<i class="fe fe-unlock" title="Public" style="font-size:1em;padding-right:5px;"></i>
<a rel="noopener" class="u-url u-uid note-permalink l" href="{{ obj | url_or_id | get_url }}">
<time class="dt-published" title="{{ obj.published }}" datetime="{{ obj.published }}">{{ obj.published | format_timeago }}</time></a>
</span>
@ -164,10 +165,14 @@
{% if perma %}
<span class="perma-item" style="float:left;padding:5px;">{{ obj.published | format_time }}</span>
{% if not (obj.id | is_from_outbox) %}
<a class ="bar-item" href="{{ obj | url_or_id | get_url }}">permalink</a>
<a class="bar-icon" href="{{ obj | url_or_id | get_url }}">
<i class="fe fe-link-external"></i>
</a>
{% endif %}
{% else %}
<a class ="bar-item" href="{{ obj | url_or_id | get_url }}">permalink</a>
<a class="bar-icon" style="font-size:1.5em;" href="{{ obj | url_or_id | get_url }}">
<i class="fe fe-link-external"></i>
</a>
{% endif %}
{% if meta.count_reply and obj.id | is_from_outbox %}<a class ="bar-item" href="{{ obj.url | get_url }}"><strong>{{ meta.count_reply }}</strong> replies</a>
@ -192,7 +197,9 @@
<input type="hidden" name="redirect" value="{{ redir }}">
<input type="hidden" name="id" value="{{ obj.id }}">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="bar-item">boost</button>
<button type="submit" class="bar-icon" style="font-size:1.5em;">
<i class="fe fe-share" title="Public"></i>
</button>
</form>
{% endif %}

56
utils/migrations.py Normal file
View file

@ -0,0 +1,56 @@
"""Automatic migration tools for the da:ta stored in MongoDB."""
import logging
from abc import ABC
from abc import abstractmethod
from typing import List
from typing import Type
from config import DB
logger = logging.getLogger(__name__)
# Used to keep track of all the defined migrations
_MIGRATIONS: List[Type["Migration"]] = []
def perform() -> None:
"""Perform all the defined migration."""
for migration in _MIGRATIONS:
migration().perform()
class Migration(ABC):
"""Abstract class for migrations."""
def __init__(self) -> None:
self.name = self.__class__.__qualname__
self._col = DB.migrations
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
_MIGRATIONS.append(cls)
def _apply(self) -> None:
self._col.insert_one({"name": self.name})
def _reset(self) -> None:
self._col.delete_one({"name": self.name})
def _is_applied(self) -> bool:
return bool(self._col.find_one({"name": self.name}))
@abstractmethod
def migrate(self) -> None:
"""Expected to be implemented by actual migrations."""
pass
def perform(self) -> None:
if self._is_applied():
logger.info(f"Skipping migration {self.name} (already applied)")
return
logger.info(f"Performing migration {self.name}...")
self.migrate()
self._apply()
logger.info("Done")