From 54ee323e59e87963e7e50699f9a84b7b769d5429 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 10 Aug 2023 10:47:26 -0700 Subject: [PATCH] Fix duplicate documents with Slack connector --- backend/danswer/connectors/slack/connector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/danswer/connectors/slack/connector.py b/backend/danswer/connectors/slack/connector.py index 052a8705596..88bfe602d74 100644 --- a/backend/danswer/connectors/slack/connector.py +++ b/backend/danswer/connectors/slack/connector.py @@ -168,11 +168,17 @@ def get_all_docs( client=client, channel=channel, oldest=oldest, latest=latest ) + seen_thread_ts: set[str] = set() for message_batch in channel_message_batches: for message in message_batch: filtered_thread: ThreadType | None = None thread_ts = message.get("thread_ts") if thread_ts: + # skip threads we've already seen, since we've already processed all + # messages in that thread + if thread_ts in seen_thread_ts: + continue + seen_thread_ts.add(thread_ts) thread = get_thread( client=client, channel_id=channel["id"], thread_id=thread_ts )