Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Avoid unnecessary work in the spaces summary. #10085

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10085.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve the performance of the spaces summary endpoint by only recursing into spaces.
5 changes: 5 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ class EventContentFields:
MSC1772_ROOM_TYPE = "org.matrix.msc1772.type"


class RoomTypes:
clokep marked this conversation as resolved.
Show resolved Hide resolved
SPACE = "m.space"
MSC1772_SPACE = "org.matrix.msc1772.space"
clokep marked this conversation as resolved.
Show resolved Hide resolved


class RoomEncryptionAlgorithms:
MEGOLM_V1_AES_SHA2 = "m.megolm.v1.aes-sha2"
DEFAULT = MEGOLM_V1_AES_SHA2
Expand Down
111 changes: 60 additions & 51 deletions synapse/handlers/space_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import re
from collections import deque
from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Set, Tuple

import attr

Expand All @@ -25,6 +25,7 @@
EventTypes,
HistoryVisibility,
Membership,
RoomTypes,
)
from synapse.events import EventBase
from synapse.events.utils import format_event_for_client_v2
Expand Down Expand Up @@ -88,11 +89,10 @@ async def get_space_summary(
room_queue = deque((_RoomQueueEntry(room_id, ()),))

# rooms we have already processed
processed_rooms = set() # type: Set[str]
processed_rooms: Dict[str, bool] = {}
clokep marked this conversation as resolved.
Show resolved Hide resolved

# events we have already processed. We don't necessarily have their event ids,
# so instead we key on (room id, state key)
processed_events = set() # type: Set[Tuple[str, str]]
# events which are pending knowing if a room is accessible.
clokep marked this conversation as resolved.
Show resolved Hide resolved
pending_events: Dict[str, List[JsonDict]] = {}

rooms_result = [] # type: List[JsonDict]
events_result = [] # type: List[JsonDict]
Expand Down Expand Up @@ -126,21 +126,27 @@ async def get_space_summary(

if room:
rooms_result.append(room)

# Mark whether this room was accessible or not.
processed_rooms[room_id] = bool(room)
clokep marked this conversation as resolved.
Show resolved Hide resolved
else:
fed_rooms, fed_events = await self._summarize_remote_room(
fed_rooms, events = await self._summarize_remote_room(
queue_entry,
suggested_only,
max_children,
exclude_rooms=processed_rooms,
)

# The queried room may or may not have been returned, but don't process
# it again, anyway. (This may be overridden below if it is accessible.)
processed_rooms[room_id] = False

# The results over federation might include rooms that the we,
# as the requesting server, are allowed to see, but the requesting
# user is not permitted see.
#
# Filter the returned results to only what is accessible to the user.
room_ids = set()
events = []
for room in fed_rooms:
fed_room_id = room.get("room_id")
if not fed_room_id or not isinstance(fed_room_id, str):
Expand Down Expand Up @@ -184,42 +190,42 @@ async def get_space_summary(

# All rooms returned don't need visiting again (even if the user
# didn't have access to them).
processed_rooms.add(fed_room_id)

for event in fed_events:
if event.get("room_id") in room_ids:
events.append(event)
processed_rooms[fed_room_id] = include_room

logger.debug(
"Query of %s returned rooms %s, events %s",
room_id,
[room.get("room_id") for room in fed_rooms],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in fed_events],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events],
)

# the room we queried may or may not have been returned, but don't process
# it again, anyway.
processed_rooms.add(room_id)
# There might be events which point to this room which are waiting
# to see if it is accessible.
room_pending_events = pending_events.pop(room_id, ())
if not processed_rooms[room_id]:
room_pending_events = ()
clokep marked this conversation as resolved.
Show resolved Hide resolved

# Return the events which reference rooms that were found to be
# accessible. Otherwise, queue them until we process those rooms.
clokep marked this conversation as resolved.
Show resolved Hide resolved
for ev in itertools.chain(events, room_pending_events):
parent_room_id = ev["room_id"]
child_room_id = ev["state_key"]

# XXX: is it ok that we blindly iterate through any events returned by
# a remote server, whether or not they actually link to any rooms in our
# tree?
for ev in events:
# remote servers might return events we have already processed
# (eg, Dendrite returns inward pointers as well as outward ones), so
# we need to filter them out, to avoid returning duplicate links to the
# client.
ev_key = (ev["room_id"], ev["state_key"])
if ev_key in processed_events:
continue
events_result.append(ev)
try:
if (
processed_rooms[parent_room_id]
and processed_rooms[child_room_id]
):
events_result.append(ev)
except KeyError:
# Note that events are pending of the child event since if
clokep marked this conversation as resolved.
Show resolved Hide resolved
# the parent event was not accessible it shouldn't be included
clokep marked this conversation as resolved.
Show resolved Hide resolved
# at all.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really follow this logic. Why do we only add the event to the pending list for the child room?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thought process was that we only want to send the event in the case of both the parent and child room being accessible to the requester.

I think the assumption here is that if the parent room isn't in processed_rooms something terrible has happened? There are two sources of data we're iterating over:

  • events - m.space.child events from the current room.
    • For local rooms, the parent will be the current room, the child will be a room that may or may not have been processed.
    • For remote rooms, this includes the above, as well as additional events that might be children of children, etc. (Or even events that aren't part of the graph.)
  • room_pending_events - m.space.child events which were previously found to be pointing to the current room, i.e. the child room is the current room and the parent room is a previously processed room.

I'm now having trouble convincing myself that parents must be processed before children and I think that assumption might be bogus actually.

pending_events.setdefault(child_room_id, []).append(ev)

# add the child to the queue. we have already validated
# that the vias are a list of server names.
room_queue.append(
_RoomQueueEntry(ev["state_key"], ev["content"]["via"])
)
processed_events.add(ev_key)
room_queue.append(_RoomQueueEntry(child_room_id, ev["content"]["via"]))

# Before returning to the client, remove the allowed_spaces key for any
# rooms.
Expand Down Expand Up @@ -328,26 +334,30 @@ async def _summarize_local_room(

room_entry = await self._build_room_entry(room_id)

# look for child rooms/spaces.
child_events = await self._get_child_events(room_id)

if suggested_only:
# we only care about suggested children
child_events = filter(_is_suggested_child_event, child_events)

if max_children is None or max_children > MAX_ROOMS_PER_SPACE:
max_children = MAX_ROOMS_PER_SPACE

now = self._clock.time_msec()
# If the the room is a space, check if there are any children.
events_result = [] # type: List[JsonDict]
for edge_event in itertools.islice(child_events, max_children):
events_result.append(
await self._event_serializer.serialize_event(
edge_event,
time_now=now,
event_format=format_event_for_client_v2,
if room_entry.get("room_type") in (RoomTypes.SPACE, RoomTypes.MSC1772_SPACE):
clokep marked this conversation as resolved.
Show resolved Hide resolved

# look for child rooms/spaces.
child_events = await self._get_child_events(room_id)

if suggested_only:
# we only care about suggested children
child_events = filter(_is_suggested_child_event, child_events)

if max_children is None or max_children > MAX_ROOMS_PER_SPACE:
max_children = MAX_ROOMS_PER_SPACE

now = self._clock.time_msec()
for edge_event in itertools.islice(child_events, max_children):
events_result.append(
await self._event_serializer.serialize_event(
edge_event,
time_now=now,
event_format=format_event_for_client_v2,
)
)
)

return room_entry, events_result

async def _summarize_remote_room(
Expand Down Expand Up @@ -448,7 +458,6 @@ async def _is_room_accessible(
member_event_id = state_ids.get((EventTypes.Member, requester), None)

# If they're in the room they can see info on it.
member_event = None
if member_event_id:
member_event = await self._store.get_event(member_event_id)
if member_event.membership in (Membership.JOIN, Membership.INVITE):
Expand Down