Lot of migration work (better normalization), and more visibility work

This commit is contained in:
Thomas Sileo 2019-07-14 23:57:23 +02:00
parent 152bcf2b26
commit 243e28fa75
8 changed files with 239 additions and 20 deletions

View file

@ -8,6 +8,7 @@ from typing import Any
from typing import Dict from typing import Dict
from typing import List from typing import List
from typing import Optional from typing import Optional
from urllib.parse import urlparse
from bson.objectid import ObjectId from bson.objectid import ObjectId
from cachetools import LRUCache from cachetools import LRUCache
@ -114,9 +115,17 @@ class MicroblogPubBackend(Backend):
def save(self, box: Box, activity: ap.BaseActivity) -> None: def save(self, box: Box, activity: ap.BaseActivity) -> None:
"""Custom helper for saving an activity to the DB.""" """Custom helper for saving an activity to the DB."""
is_public = True visibility = ap.get_visibility(activity)
if activity.has_type(ap.ActivityType.CREATE) and not activity.is_public():
is_public = False is_public = False
if visibility in [ap.Visibility.PUBLIC, ap.Visibility.UNLISTED]:
is_public = True
object_id = None
try:
object_id = activity.get_object_id()
except ValueError:
pass
actor_id = activity.get_actor().id
DB.activities.insert_one( DB.activities.insert_one(
{ {
@ -124,7 +133,16 @@ class MicroblogPubBackend(Backend):
"activity": activity.to_dict(), "activity": activity.to_dict(),
"type": _to_list(activity.type), "type": _to_list(activity.type),
"remote_id": activity.id, "remote_id": activity.id,
"meta": {"undo": False, "deleted": False, "public": is_public}, "meta": {
"undo": False,
"deleted": False,
"public": is_public,
"server": urlparse(activity.id).netloc,
"visibility": visibility.name,
"actor_id": actor_id,
"object_id": object_id,
"poll_answer": False,
},
} }
) )
@ -481,6 +499,15 @@ class MicroblogPubBackend(Backend):
@ensure_it_is_me @ensure_it_is_me
def outbox_create(self, as_actor: ap.Person, create: ap.Create) -> None: def outbox_create(self, as_actor: ap.Person, create: ap.Create) -> None:
obj = create.get_object()
# Flag the activity as a poll answer if needed
print(f"POLL ANSWER ChECK {obj.get_in_reply_to()} {obj.name} {obj.content}")
if obj.get_in_reply_to() and obj.name and not obj.content:
DB.activities.update_one(
{"remote_id": create.id}, {"$set": {"meta.poll_answer": True}}
)
self._handle_replies(as_actor, create) self._handle_replies(as_actor, create)
@ensure_it_is_me @ensure_it_is_me
@ -540,7 +567,13 @@ class MicroblogPubBackend(Backend):
DB.activities.update_one( DB.activities.update_one(
{"remote_id": create.id}, {"remote_id": create.id},
{"$set": {"meta.answer_to": question.id, "meta.stream": False}}, {
"$set": {
"meta.answer_to": question.id,
"meta.stream": False,
"meta.poll_answer": True,
}
},
) )
return None return None

25
app.py
View file

@ -144,14 +144,13 @@ def inject_config():
{"box": Box.OUTBOX.value, "$or": [q, {"type": "Announce", "meta.undo": False}]} {"box": Box.OUTBOX.value, "$or": [q, {"type": "Announce", "meta.undo": False}]}
).count() ).count()
# FIXME(tsileo): rename to all_count, and remove poll answers from it # FIXME(tsileo): rename to all_count, and remove poll answers from it
with_replies_count = DB.activities.find( all_q = {
{
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]}, "type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
"meta.undo": False, "meta.undo": False,
"meta.deleted": False, "meta.deleted": False,
"meta.poll_answer": False,
} }
).count()
liked_count = DB.activities.count( liked_count = DB.activities.count(
{ {
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
@ -181,7 +180,7 @@ def inject_config():
following_count=DB.activities.count(following_q) if logged_in else 0, following_count=DB.activities.count(following_q) if logged_in else 0,
notes_count=notes_count, notes_count=notes_count,
liked_count=liked_count, liked_count=liked_count,
with_replies_count=with_replies_count if logged_in else 0, with_replies_count=DB.activities.count(all_q) if logged_in else 0,
me=ME, me=ME,
base_url=config.BASE_URL, base_url=config.BASE_URL,
) )
@ -916,6 +915,7 @@ def all():
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]}, "type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
"meta.deleted": False, "meta.deleted": False,
"meta.undo": False, "meta.undo": False,
"meta.poll_answer": False,
} }
outbox_data, older_than, newer_than = paginated_query(DB.activities, q) outbox_data, older_than, newer_than = paginated_query(DB.activities, q)
@ -1218,7 +1218,7 @@ def outbox():
if request.method == "GET": if request.method == "GET":
if not is_api_request(): if not is_api_request():
abort(404) abort(404)
# TODO(tsileo): returns the whole outbox if authenticated # TODO(tsileo): returns the whole outbox if authenticated and look at OCAP support
q = { q = {
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
"meta.deleted": False, "meta.deleted": False,
@ -1253,7 +1253,11 @@ def outbox():
@app.route("/outbox/<item_id>") @app.route("/outbox/<item_id>")
def outbox_detail(item_id): def outbox_detail(item_id):
doc = DB.activities.find_one( doc = DB.activities.find_one(
{"box": Box.OUTBOX.value, "remote_id": back.activity_url(item_id)} {
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.public": True,
}
) )
if not doc: if not doc:
abort(404) abort(404)
@ -1269,7 +1273,11 @@ def outbox_detail(item_id):
@app.route("/outbox/<item_id>/activity") @app.route("/outbox/<item_id>/activity")
def outbox_activity(item_id): def outbox_activity(item_id):
data = DB.activities.find_one( data = DB.activities.find_one(
{"box": Box.OUTBOX.value, "remote_id": back.activity_url(item_id)} {
"box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id),
"meta.public": True,
}
) )
if not data: if not data:
abort(404) abort(404)
@ -1295,6 +1303,7 @@ def outbox_activity_replies(item_id):
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id), "remote_id": back.activity_url(item_id),
"meta.deleted": False, "meta.deleted": False,
"meta.public": True,
} }
) )
if not data: if not data:
@ -1305,6 +1314,7 @@ def outbox_activity_replies(item_id):
q = { q = {
"meta.deleted": False, "meta.deleted": False,
"meta.public": True,
"type": ActivityType.CREATE.value, "type": ActivityType.CREATE.value,
"activity.object.inReplyTo": obj.get_object().id, "activity.object.inReplyTo": obj.get_object().id,
} }
@ -1330,6 +1340,7 @@ def outbox_activity_likes(item_id):
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
"remote_id": back.activity_url(item_id), "remote_id": back.activity_url(item_id),
"meta.deleted": False, "meta.deleted": False,
"meta.public": True,
} }
) )
if not data: if not data:

101
migrations.py Normal file
View file

@ -0,0 +1,101 @@
"""Migrations that will be run automatically at startup."""
from typing import Any
from typing import Dict
from urllib.parse import urlparse
from little_boxes import activitypub as ap
from utils.migrations import DB
from utils.migrations import Migration
from utils.migrations import logger
from utils.migrations import perform # noqa: just here for export
class _1_MetaMigrationt(Migration):
"""Add new metadata to simplify querying."""
def __guess_visibility(self, data: Dict[str, Any]) -> ap.Visibility:
to = data.get("to", [])
cc = data.get("cc", [])
if ap.AS_PUBLIC in to:
return ap.Visibility.PUBLIC
elif ap.AS_PUBLIC in cc:
return ap.Visibility.UNLISTED
else:
# Uses a bit of heuristic here, it's too expensive to fetch the actor, so assume the followers
# collection has "/collection" in it (which is true for most software), and at worst, we will
# classify it as "DIRECT" which behave the same as "FOLLOWERS_ONLY" (i.e. no Announce)
followers_only = False
for item in to:
if "/followers" in item:
followers_only = True
break
if not followers_only:
for item in cc:
if "/followers" in item:
followers_only = True
break
if followers_only:
return ap.Visibility.FOLLOWERS_ONLY
return ap.Visibility.DIRECT
def migrate(self) -> None: # noqa: C901 # too complex
for data in DB.activities.find():
logger.info(f"before={data}")
obj = data["activity"].get("object")
set_meta: Dict[str, Any] = {}
# Set `meta.object_id` (str)
if not data["meta"].get("object_id"):
set_meta["meta.object_id"] = None
if obj:
if isinstance(obj, str):
set_meta["meta.object_id"] = data["activity"]["object"]
elif isinstance(obj, dict):
obj_id = obj.get("id")
if obj_id:
set_meta["meta.object_id"] = obj_id
# Set `meta.object_visibility` (str)
if not data["meta"].get("object_visibility"):
set_meta["meta.object_visibility"] = None
object_id = data["meta"].get("object_id") or set_meta.get("meta.object_id")
if object_id:
obj = data["meta"].get("object") or data["activity"].get("object")
if isinstance(obj, dict):
set_meta["meta.object_visibility"] = self.__guess_visibility(obj).name
# Set `meta.actor_id` (str)
if not data["meta"].get("actor_id"):
set_meta["meta.actor_id"] = None
actor = data["activity"].get("actor")
if actor:
if isinstance(actor, str):
set_meta["meta.actor_id"] = data["activity"]["actor"]
elif isinstance(actor, dict):
actor_id = actor.get("id")
if actor_id:
set_meta["meta.actor_id"] = actor_id
# Set `meta.poll_answer` (bool)
if not data["meta"].get("poll_answer"):
set_meta["meta.poll_answer"] = False
if obj:
if isinstance(obj, dict):
if (
obj.get("name")
and not obj.get("content")
and obj.get("inReplyTo")
):
set_meta["meta.poll_answer"] = True
# Set `meta.visibility` (str)
if not data["meta"].get("visibility"):
set_meta["meta.visibility"] = self.__guess_visibility(data["activity"]).name
if not data["meta"].get("server"):
set_meta["meta.server"] = urlparse(data["remote_id"]).netloc
logger.info(f"meta={set_meta}\n")
DB.activities.update_one({"_id": data["_id"]}, {"$set": set_meta})

1
run.sh
View file

@ -1,3 +1,4 @@
#!/bin/bash #!/bin/bash
python -c "import logging; logging.basicConfig(level=logging.DEBUG); import migrations; migrations.perform()"
python -c "import config; config.create_indexes()" python -c "import config; config.create_indexes()"
gunicorn -t 600 -w 5 -b 0.0.0.0:5005 --log-level debug app:app gunicorn -t 600 -w 5 -b 0.0.0.0:5005 --log-level debug app:app

View file

@ -270,6 +270,15 @@ a:hover {
float: left; float: left;
border-radius:2px; border-radius:2px;
} }
.bar-icon {
background: $background-color;
padding: 5px;
color: $color-light;
margin-right: 10px;
float: left;
border: none;
}
.bar-item:hover { .bar-item:hover {
background: $primary-color; background: $primary-color;
color: $background-color; color: $background-color;

View file

@ -6,6 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>{% block title %}{{ config.NAME }}{% endblock %}'s microblog</title> <title>{% block title %}{{ config.NAME }}{% endblock %}'s microblog</title>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="/static/css/feathericon.min.css">
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth"> <link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token"> <link rel="token_endpoint" href="{{ config.ID }}/token">
{% if not request.args.get("older_than") and not request.args.get("previous_than") %}<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">{% endif %} {% if not request.args.get("older_than") and not request.args.get("previous_than") %}<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">{% endif %}

View file

@ -56,6 +56,7 @@
{% if not perma %} {% if not perma %}
<span style="float:right;width: 20%;text-align: right;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;display: block;"> <span style="float:right;width: 20%;text-align: right;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;display: block;">
<i class="fe fe-unlock" title="Public" style="font-size:1em;padding-right:5px;"></i>
<a rel="noopener" class="u-url u-uid note-permalink l" href="{{ obj | url_or_id | get_url }}"> <a rel="noopener" class="u-url u-uid note-permalink l" href="{{ obj | url_or_id | get_url }}">
<time class="dt-published" title="{{ obj.published }}" datetime="{{ obj.published }}">{{ obj.published | format_timeago }}</time></a> <time class="dt-published" title="{{ obj.published }}" datetime="{{ obj.published }}">{{ obj.published | format_timeago }}</time></a>
</span> </span>
@ -164,10 +165,14 @@
{% if perma %} {% if perma %}
<span class="perma-item" style="float:left;padding:5px;">{{ obj.published | format_time }}</span> <span class="perma-item" style="float:left;padding:5px;">{{ obj.published | format_time }}</span>
{% if not (obj.id | is_from_outbox) %} {% if not (obj.id | is_from_outbox) %}
<a class ="bar-item" href="{{ obj | url_or_id | get_url }}">permalink</a> <a class="bar-icon" href="{{ obj | url_or_id | get_url }}">
<i class="fe fe-link-external"></i>
</a>
{% endif %} {% endif %}
{% else %} {% else %}
<a class ="bar-item" href="{{ obj | url_or_id | get_url }}">permalink</a> <a class="bar-icon" style="font-size:1.5em;" href="{{ obj | url_or_id | get_url }}">
<i class="fe fe-link-external"></i>
</a>
{% endif %} {% endif %}
{% if meta.count_reply and obj.id | is_from_outbox %}<a class ="bar-item" href="{{ obj.url | get_url }}"><strong>{{ meta.count_reply }}</strong> replies</a> {% if meta.count_reply and obj.id | is_from_outbox %}<a class ="bar-item" href="{{ obj.url | get_url }}"><strong>{{ meta.count_reply }}</strong> replies</a>
@ -192,7 +197,9 @@
<input type="hidden" name="redirect" value="{{ redir }}"> <input type="hidden" name="redirect" value="{{ redir }}">
<input type="hidden" name="id" value="{{ obj.id }}"> <input type="hidden" name="id" value="{{ obj.id }}">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"> <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="bar-item">boost</button> <button type="submit" class="bar-icon" style="font-size:1.5em;">
<i class="fe fe-share" title="Public"></i>
</button>
</form> </form>
{% endif %} {% endif %}

56
utils/migrations.py Normal file
View file

@ -0,0 +1,56 @@
"""Automatic migration tools for the da:ta stored in MongoDB."""
import logging
from abc import ABC
from abc import abstractmethod
from typing import List
from typing import Type
from config import DB
logger = logging.getLogger(__name__)
# Used to keep track of all the defined migrations
_MIGRATIONS: List[Type["Migration"]] = []
def perform() -> None:
"""Perform all the defined migration."""
for migration in _MIGRATIONS:
migration().perform()
class Migration(ABC):
"""Abstract class for migrations."""
def __init__(self) -> None:
self.name = self.__class__.__qualname__
self._col = DB.migrations
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
_MIGRATIONS.append(cls)
def _apply(self) -> None:
self._col.insert_one({"name": self.name})
def _reset(self) -> None:
self._col.delete_one({"name": self.name})
def _is_applied(self) -> bool:
return bool(self._col.find_one({"name": self.name}))
@abstractmethod
def migrate(self) -> None:
"""Expected to be implemented by actual migrations."""
pass
def perform(self) -> None:
if self._is_applied():
logger.info(f"Skipping migration {self.name} (already applied)")
return
logger.info(f"Performing migration {self.name}...")
self.migrate()
self._apply()
logger.info("Done")