Improve actor caching

This commit is contained in:
Thomas Sileo 2019-08-11 11:32:52 +02:00
parent b6751f511a
commit 9aa1f67e28
4 changed files with 56 additions and 44 deletions

View file

@ -141,19 +141,28 @@ def task_cache_object() -> _Response:
activity = ap.fetch_remote_activity(iri) activity = ap.fetch_remote_activity(iri)
app.logger.info(f"activity={activity!r}") app.logger.info(f"activity={activity!r}")
obj = activity.get_object() obj = activity.get_object()
obj_actor = obj.get_actor()
obj_actor_hash = _actor_hash(obj_actor) # Refetch the object actor (without cache)
with no_cache():
obj_actor = ap.fetch_remote_activity(obj.get_actor().id)
cache = {MetaKey.OBJECT: obj.to_dict(embed=True)} cache = {MetaKey.OBJECT: obj.to_dict(embed=True)}
if activity.get_actor().id != obj_actor.id: if activity.get_actor().id != obj_actor.id:
# Cache the object actor
obj_actor_hash = _actor_hash(obj_actor)
cache[MetaKey.OBJECT_ACTOR] = obj_actor.to_dict(embed=True) cache[MetaKey.OBJECT_ACTOR] = obj_actor.to_dict(embed=True)
cache[MetaKey.OBJECT_ACTOR_ID] = obj_actor.id cache[MetaKey.OBJECT_ACTOR_ID] = obj_actor.id
cache[MetaKey.OBJECT_ACTOR_HASH] = obj_actor_hash cache[MetaKey.OBJECT_ACTOR_HASH] = obj_actor_hash
# FIXME(tsileo): set OBJECT_ACTOR_HASH (like in "cache actor" and do an update_many even for ACTOR (not only # Cache the actor icon if any
# OBJECT_ACTOR) ; a migration for OBJECT_ACTOR_ID/OBJECT_ACTOR_HASH needed? _cache_actor_icon(obj_actor)
# Update the actor cache for the other activities
_update_cached_actor(obj_actor)
update_one_activity(by_remote_id(activity.id), upsert(cache)) update_one_activity(by_remote_id(activity.id), upsert(cache))
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError): except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}}) DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}})
app.logger.exception(f"flagging activity {iri} as deleted, no object caching") app.logger.exception(f"flagging activity {iri} as deleted, no object caching")
@ -243,6 +252,39 @@ def task_cache_attachments() -> _Response:
return "" return ""
def _update_cached_actor(actor: ap.BaseActivity) -> None:
actor_hash = _actor_hash(actor)
update_many_activities(
{
**flag(MetaKey.ACTOR_ID, actor.id),
**flag(MetaKey.ACTOR_HASH, {"$ne": actor_hash}),
},
upsert(
{MetaKey.ACTOR: actor.to_dict(embed=True), MetaKey.ACTOR_HASH: actor_hash}
),
)
update_many_activities(
{
**flag(MetaKey.OBJECT_ACTOR_ID, actor.id),
**flag(MetaKey.OBJECT_ACTOR_HASH, {"$ne": actor_hash}),
},
upsert(
{
MetaKey.OBJECT_ACTOR: actor.to_dict(embed=True),
MetaKey.OBJECT_ACTOR_HASH: actor_hash,
}
),
)
def _cache_actor_icon(actor: ap.BaseActivity) -> None:
if actor.icon:
if isinstance(actor.icon, dict) and "url" in actor.icon:
config.MEDIA_CACHE.cache_actor_icon(actor.icon["url"])
else:
app.logger.warning(f"failed to parse icon {actor.icon} for {actor!r}")
@blueprint.route("/task/cache_actor", methods=["POST"]) @blueprint.route("/task/cache_actor", methods=["POST"])
def task_cache_actor() -> _Response: def task_cache_actor() -> _Response:
task = p.parse(flask.request) task = p.parse(flask.request)
@ -256,17 +298,12 @@ def task_cache_actor() -> _Response:
with no_cache(): with no_cache():
actor = ap.fetch_remote_activity(activity.get_actor().id) actor = ap.fetch_remote_activity(activity.get_actor().id)
actor_hash = _actor_hash(actor)
# Fetch the Open Grah metadata if it's a `Create` # Fetch the Open Grah metadata if it's a `Create`
if activity.has_type(ap.ActivityType.CREATE): if activity.has_type(ap.ActivityType.CREATE):
Tasks.fetch_og_meta(iri) Tasks.fetch_og_meta(iri)
if actor.icon: # Cache the actor icon if any
if isinstance(actor.icon, dict) and "url" in actor.icon: _cache_actor_icon(actor)
config.MEDIA_CACHE.cache_actor_icon(actor.icon["url"])
else:
app.logger.warning(f"failed to parse icon {actor.icon} for {iri}")
if activity.has_type(ap.ActivityType.FOLLOW): if activity.has_type(ap.ActivityType.FOLLOW):
if actor.id == config.ID: if actor.id == config.ID:
@ -277,18 +314,7 @@ def task_cache_actor() -> _Response:
) )
# Cache the actor info # Cache the actor info
update_many_activities( _update_cached_actor(actor)
{
**flag(MetaKey.ACTOR_ID, actor.id),
**flag(MetaKey.ACTOR_HASH, {"$ne": actor_hash}),
},
upsert(
{
MetaKey.ACTOR: actor.to_dict(embed=True),
MetaKey.ACTOR_HASH: actor_hash,
}
),
)
# TODO(tsileo): Also update following (it's in the object) # TODO(tsileo): Also update following (it's in the object)
# DB.activities.update_many( # DB.activities.update_many(

View file

@ -121,15 +121,9 @@ def _announce_process_inbox(announce: ap.Announce, new_meta: _NewMeta) -> None:
if obj.has_type(ap.ActivityType.QUESTION): if obj.has_type(ap.ActivityType.QUESTION):
Tasks.fetch_remote_question(obj) Tasks.fetch_remote_question(obj)
update_one_activity( # Cache the announced object
by_remote_id(announce.id), Tasks.cache_object(announce.id)
upsert(
{
MetaKey.OBJECT: obj.to_dict(embed=True),
MetaKey.OBJECT_ACTOR: obj.get_actor().to_dict(embed=True),
}
),
)
update_one_activity( update_one_activity(
{**by_type(ap.ActivityType.CREATE), **by_object_id(obj.id)}, {**by_type(ap.ActivityType.CREATE), **by_object_id(obj.id)},
inc(MetaKey.COUNT_BOOST, 1), inc(MetaKey.COUNT_BOOST, 1),

View file

@ -131,8 +131,6 @@ def _announce_set_inbox_flags(activity: ap.Announce, new_meta: _NewMeta) -> None
# Also set the "keep mark" for the GC (as we want to keep it forever) # Also set the "keep mark" for the GC (as we want to keep it forever)
_set_flag(new_meta, MetaKey.GC_KEEP) _set_flag(new_meta, MetaKey.GC_KEEP)
# Cache the object in all case (for display on the notifcation page **and** the stream page)
Tasks.cache_object(activity.id)
# Display it in the stream # Display it in the stream
_set_flag(new_meta, MetaKey.STREAM) _set_flag(new_meta, MetaKey.STREAM)

View file

@ -11,7 +11,6 @@ from core.db import update_many_activities
from core.db import update_one_activity from core.db import update_one_activity
from core.meta import MetaKey from core.meta import MetaKey
from core.meta import by_object_id from core.meta import by_object_id
from core.meta import by_remote_id
from core.meta import by_type from core.meta import by_type
from core.meta import inc from core.meta import inc
from core.meta import upsert from core.meta import upsert
@ -99,15 +98,7 @@ def _announce_process_outbox(announce: ap.Announce, new_meta: _NewMeta) -> None:
if obj.has_type(ap.ActivityType.QUESTION): if obj.has_type(ap.ActivityType.QUESTION):
Tasks.fetch_remote_question(obj) Tasks.fetch_remote_question(obj)
update_one_activity( Tasks.cache_object(announce.id)
by_remote_id(announce.id),
upsert(
{
MetaKey.OBJECT: obj.to_dict(embed=True),
MetaKey.OBJECT_ACTOR: obj.get_actor().to_dict(embed=True),
}
),
)
update_one_activity( update_one_activity(
{**by_object_id(obj.id), **by_type(ap.ActivityType.CREATE)}, {**by_object_id(obj.id), **by_type(ap.ActivityType.CREATE)},
@ -123,6 +114,9 @@ def _like_process_outbox(like: ap.Like, new_meta: _NewMeta) -> None:
if obj.has_type(ap.ActivityType.QUESTION): if obj.has_type(ap.ActivityType.QUESTION):
Tasks.fetch_remote_question(obj) Tasks.fetch_remote_question(obj)
# Cache the object for display on the "Liked" public page
Tasks.cache_object(like.id)
update_one_activity( update_one_activity(
{**by_object_id(obj.id), **by_type(ap.ActivityType.CREATE)}, {**by_object_id(obj.id), **by_type(ap.ActivityType.CREATE)},
{**inc(MetaKey.COUNT_LIKE, 1), **upsert({MetaKey.LIKED: like.id})}, {**inc(MetaKey.COUNT_LIKE, 1), **upsert({MetaKey.LIKED: like.id})},