From 40602c0c2975217ef8ae0743e30dd980408bcec5 Mon Sep 17 00:00:00 2001 From: Gabriel Gordon-Hall Date: Wed, 27 Mar 2024 14:29:25 +0000 Subject: [PATCH] remove analytics --- Cargo.lock | 102 +------ apps/desktop/src-tauri/src/backend.rs | 28 +- package-lock.json | 6 +- server/bleep/Cargo.toml | 1 - server/bleep/src/agent.rs | 32 --- server/bleep/src/agent/tools/answer.rs | 10 - server/bleep/src/agent/tools/code.rs | 16 +- server/bleep/src/agent/tools/path.rs | 11 - server/bleep/src/agent/tools/proc.rs | 7 - server/bleep/src/analytics.rs | 322 ----------------------- server/bleep/src/bin/bleep.rs | 2 +- server/bleep/src/indexes/analytics.rs | 68 +---- server/bleep/src/indexes/file.rs | 9 +- server/bleep/src/lib.rs | 75 +----- server/bleep/src/webserver.rs | 5 - server/bleep/src/webserver/answer.rs | 37 +-- server/bleep/src/webserver/config.rs | 16 -- server/bleep/src/webserver/docs.rs | 13 +- server/bleep/src/webserver/github.rs | 34 --- server/bleep/src/webserver/middleware.rs | 8 - server/bleep/src/webserver/repos.rs | 20 +- server/bleep/src/webserver/studio.rs | 32 --- 22 files changed, 20 insertions(+), 834 deletions(-) delete mode 100644 server/bleep/src/analytics.rs diff --git a/Cargo.lock b/Cargo.lock index a04c0657d6..43771b2d1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -290,17 +290,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -536,7 +525,6 @@ dependencies = [ "reqwest", "reqwest-eventsource", "ring 0.16.20", - "rudderanalytics", "scc", "secrecy", "select", @@ -1675,19 +1663,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1758,28 +1733,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "failure" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" -dependencies = [ - "backtrace", - "failure_derive", -] - -[[package]] -name = "failure_derive" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "synstructure", -] - [[package]] name = "fancy-regex" version = "0.11.0" @@ -3305,15 +3258,6 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.3.3" @@ -3753,7 +3697,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi 0.3.3", + "hermit-abi", "rustix", "windows-sys 0.48.0", ] @@ -4621,7 +4565,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.3", + "hermit-abi", "libc", ] @@ -5926,21 +5870,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "rudderanalytics" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "328fcf7e15fec5dafd6cc362de92fb48e821c9f2cac54e6942a4f2cef4783ba6" -dependencies = [ - "chrono", - "env_logger", - "failure", - "log", - "reqwest", - "serde", - "serde_json", -] - [[package]] name = "rust-stemmers" version = "1.2.0" @@ -6882,18 +6811,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "unicode-xid", -] - [[package]] name = "sys-locale" version = "0.2.4" @@ -7416,15 +7333,6 @@ dependencies = [ "utf-8", ] -[[package]] -name = "termcolor" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" -dependencies = [ - "winapi-util", -] - [[package]] name = "thin-slice" version = "0.1.1" @@ -8202,12 +8110,6 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - [[package]] name = "unicode_categories" version = "0.1.1" diff --git a/apps/desktop/src-tauri/src/backend.rs b/apps/desktop/src-tauri/src/backend.rs index 134ded5323..6dab648cc9 100644 --- a/apps/desktop/src-tauri/src/backend.rs +++ b/apps/desktop/src-tauri/src/backend.rs @@ -1,7 +1,7 @@ -use bleep::{analytics, Application, Configuration, Environment}; +use bleep::{Application, Configuration, Environment}; use tracing::error; -use super::{config::get_device_id, Manager, Payload, Runtime}; +use super::{Manager, Payload, Runtime}; use std::thread; use std::time::Duration; @@ -93,19 +93,7 @@ async fn start_backend(configuration: Configuration, app: tauri::App app.manage(configuration.clone()); - let initialized = Application::initialize( - Environment::insecure_local(), - configuration, - get_device_id(), - analytics::HubOptions { - package_metadata: Some(analytics::PackageMetadata { - name: env!("CARGO_CRATE_NAME"), - version: env!("CARGO_PKG_VERSION"), - git_rev: git_version::git_version!(fallback = "unknown"), - }), - }, - ) - .await; + let initialized = Application::initialize(Environment::insecure_local(), configuration).await; match initialized { Ok(backend) => { @@ -114,16 +102,6 @@ async fn start_backend(configuration: Configuration, app: tauri::App scope.add_event_processor(move |mut event| { event.user = Some(crate::config::sentry_user()).map(|mut user| { let username = backend.username(); - - user.id = Some( - if let (Some(analytics), Some(username)) = - (&backend.analytics, &username) - { - analytics.tracking_id(Some(username)) - } else { - get_device_id() - }, - ); user.username = username; user }); diff --git a/package-lock.json b/package-lock.json index 8194f05ab4..78e8f371a8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5708,9 +5708,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001517", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001517.tgz", - "integrity": "sha512-Vdhm5S11DaFVLlyiKu4hiUTkpZu+y1KA/rZZqVQfOD5YdDT/eQKlkt7NaE0WGOFgX32diqt9MiP9CAiFeRklaA==", + "version": "1.0.30001599", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001599.tgz", + "integrity": "sha512-LRAQHZ4yT1+f9LemSMeqdMpMxZcc4RMWdj4tiFe3G8tNkWK+E58g+/tzotb5cU6TbcVJLr4fySiAW7XmxQvZQA==", "dev": true, "funding": [ { diff --git a/server/bleep/Cargo.toml b/server/bleep/Cargo.toml index c9284fc0a1..9b2e500dd1 100644 --- a/server/bleep/Cargo.toml +++ b/server/bleep/Cargo.toml @@ -130,7 +130,6 @@ quick-xml = { version = "0.29.0", features = ["serialize"] } # telemetry sentry = { version = "0.31.7", default-features = false, features = ["tracing", "contexts", "debug-images", "panic", "rustls", "reqwest"] } sentry-tracing = "0.31.7" -rudderanalytics = { version = "1.1.2", default-features = false, features = ["rustls-tls"] } # auth jsonwebtoken = { version = "8.3.0", features = ["use_pem"] } diff --git a/server/bleep/src/agent.rs b/server/bleep/src/agent.rs index 1bf397843b..fa0db605ab 100644 --- a/server/bleep/src/agent.rs +++ b/server/bleep/src/agent.rs @@ -7,7 +7,6 @@ use tracing::{debug, error, info, instrument}; use crate::{ agent::exchange::RepoPath, - analytics::{EventData, QueryEvent}, indexes::reader::{ContentDocument, FileDocument}, llm_gateway::{self, api::FunctionCall}, query::{parser, stopwords::remove_stopwords}, @@ -91,18 +90,8 @@ impl Drop for Agent { ExchangeState::Failed => {} ExchangeState::Pending => { if std::thread::panicking() { - self.track_query( - EventData::output_stage("cancelled") - .with_payload("message", "request panicked"), - ); } else { self.last_exchange_mut().apply_update(Update::SetTimestamp); - - self.track_query( - EventData::output_stage("cancelled") - .with_payload("message", "request was cancelled"), - ); - tokio::spawn(self.store()); } } @@ -139,15 +128,6 @@ impl Agent { .map_err(|_| anyhow!("exchange_tx was closed")) } - pub fn track_query(&self, data: EventData) { - let event = QueryEvent { - query_id: self.query_id, - thread_id: self.conversation.thread_id, - data, - }; - self.app.track_query(&self.user, &event); - } - fn last_exchange(&self) -> &Exchange { self.conversation .exchanges @@ -201,8 +181,6 @@ impl Agent { match &action { Action::Query(s) => { - self.track_query(EventData::input_stage("query").with_payload("q", s)); - // Always make a code search for the user query on the first exchange if self.conversation.exchanges.len() == 1 { let keywords = { @@ -270,16 +248,6 @@ impl Agent { .await .context("failed to fold LLM function call output")?; - self.track_query( - EventData::output_stage("llm_reply") - .with_payload("full_history", &history) - .with_payload("trimmed_history", &trimmed_history) - .with_payload("last_message", history.last()) - .with_payload("functions", &functions) - .with_payload("raw_response", &raw_response) - .with_payload("model", &self.llm_gateway.model), - ); - let action = Action::deserialize_gpt(&raw_response).context("failed to deserialize LLM output")?; diff --git a/server/bleep/src/agent/tools/answer.rs b/server/bleep/src/agent/tools/answer.rs index b8e48a59a0..e24df5804a 100644 --- a/server/bleep/src/agent/tools/answer.rs +++ b/server/bleep/src/agent/tools/answer.rs @@ -9,7 +9,6 @@ use crate::{ exchange::{CodeChunk, FocusedChunk, Update}, model, transcoder, Agent, }, - analytics::EventData, llm_gateway, }; @@ -86,15 +85,6 @@ impl Agent { self.update(Update::SetTimestamp).await?; - self.track_query( - EventData::output_stage("answer_article") - .with_payload("query", self.last_exchange().query()) - .with_payload("query_history", &history) - .with_payload("response", &response) - .with_payload("raw_prompt", &system_prompt) - .with_payload("model", self.answer_model.model_name), - ); - Ok(()) } diff --git a/server/bleep/src/agent/tools/code.rs b/server/bleep/src/agent/tools/code.rs index 2af1861825..548a6261a9 100644 --- a/server/bleep/src/agent/tools/code.rs +++ b/server/bleep/src/agent/tools/code.rs @@ -6,7 +6,6 @@ use crate::{ exchange::{CodeChunk, RepoPath, SearchStep, Update}, prompts, Agent, AgentSemanticSearchParams, }, - analytics::EventData, llm_gateway, query::parser::Literal, semantic::SemanticSearchParams, @@ -41,7 +40,7 @@ impl Agent { debug!("returned {} results", results.len()); - let hyde_docs = if results.len() < MINIMUM_RESULTS { + if results.len() < MINIMUM_RESULTS { info!("too few results returned, running HyDE"); let hyde_docs = self.hyde(query).await?; @@ -64,10 +63,7 @@ impl Agent { debug!("returned {} HyDE results", results.len()); results.extend(hyde_results); } - hyde_docs - } else { - vec![] - }; + } let mut chunks = results .into_iter() @@ -127,14 +123,6 @@ impl Agent { })) .await?; - self.track_query( - EventData::input_stage("semantic code search") - .with_payload("query", query) - .with_payload("hyde_queries", &hyde_docs) - .with_payload("chunks", &chunks) - .with_payload("raw_prompt", &response), - ); - Ok(response) } diff --git a/server/bleep/src/agent/tools/path.rs b/server/bleep/src/agent/tools/path.rs index 74f8c276fc..9bfd830193 100644 --- a/server/bleep/src/agent/tools/path.rs +++ b/server/bleep/src/agent/tools/path.rs @@ -8,7 +8,6 @@ use crate::{ exchange::{RepoPath, SearchStep, Update}, Agent, AgentSemanticSearchParams, }, - analytics::EventData, semantic::SemanticSearchParams, }; @@ -33,8 +32,6 @@ impl Agent { .into_iter() .collect::>(); - let is_semantic = paths.is_empty(); - // If there are no lexical results, perform a semantic search. if paths.is_empty() { let semantic_paths = self @@ -80,14 +77,6 @@ impl Agent { })) .await?; - self.track_query( - EventData::input_stage("path search") - .with_payload("query", query) - .with_payload("is_semantic", is_semantic) - .with_payload("results", &paths) - .with_payload("raw_prompt", &response), - ); - Ok(response) } } diff --git a/server/bleep/src/agent/tools/proc.rs b/server/bleep/src/agent/tools/proc.rs index 26bbc4596b..b51dc2dfb4 100644 --- a/server/bleep/src/agent/tools/proc.rs +++ b/server/bleep/src/agent/tools/proc.rs @@ -6,7 +6,6 @@ use crate::{ exchange::{CodeChunk, RepoPath, SearchStep, Update}, Agent, AgentSemanticSearchParams, }, - analytics::EventData, query::parser::Literal, semantic::SemanticSearchParams, }; @@ -93,12 +92,6 @@ impl Agent { })) .await?; - self.track_query( - EventData::input_stage("process file") - .with_payload("question", query) - .with_payload("chunks", &response), - ); - Ok(response) } } diff --git a/server/bleep/src/analytics.rs b/server/bleep/src/analytics.rs deleted file mode 100644 index e7c3565d89..0000000000 --- a/server/bleep/src/analytics.rs +++ /dev/null @@ -1,322 +0,0 @@ -use std::{fmt::Debug, sync::Arc}; - -use crate::state::{PersistedState, StateSource}; - -use rudderanalytics::{ - client::RudderAnalytics, - message::{Message, Track}, -}; -use serde::{Deserialize, Serialize}; -use serde_json::{json, Value}; -use tracing::{info, warn}; -use uuid::Uuid; - -#[derive(Debug, Clone)] -pub struct QueryEvent { - pub query_id: Uuid, - pub thread_id: Uuid, - pub data: EventData, -} - -#[derive(Debug, Clone)] -pub struct StudioEvent { - pub studio_id: i64, - - // This is not a `Map`, to prevent RudderStack from collapsing these fields into columns - // in the analytics DB. - pub payload: Vec<(String, Value)>, - pub type_: String, -} - -impl StudioEvent { - pub fn new(studio_id: i64, type_: &str) -> Self { - Self { - studio_id, - type_: type_.to_owned(), - payload: Vec::new(), - } - } - - pub fn with_payload(mut self, name: &str, payload: &T) -> Self { - self.payload.push(( - name.to_owned(), - serde_json::to_value(payload.clone()).unwrap(), - )); - self - } -} - -#[derive(Debug, Clone)] -pub struct DocEvent { - pub payload: Vec<(String, Value)>, - pub type_: String, -} - -impl DocEvent { - pub fn new(type_: &str) -> Self { - Self { - payload: vec![], - type_: type_.to_owned(), - } - } - - pub fn with_payload(mut self, name: &str, payload: &T) -> Self { - self.payload.push(( - name.to_owned(), - serde_json::to_value(payload.clone()).unwrap(), - )); - self - } -} - -#[derive(Debug, Clone)] -pub struct RepoEvent { - pub name: String, - pub payload: Vec<(String, Value)>, -} - -impl RepoEvent { - pub fn new(name: &str) -> Self { - Self { - name: name.to_owned(), - payload: vec![], - } - } - - pub fn with_payload(mut self, name: &str, payload: &T) -> Self { - self.payload.push(( - name.to_owned(), - serde_json::to_value(payload.clone()).unwrap(), - )); - self - } - - pub fn add_payload(&mut self, name: &str, payload: &T) { - self.payload.push(( - name.to_owned(), - serde_json::to_value(payload.clone()).unwrap(), - )); - } -} - -#[derive(Debug, Clone, Serialize)] -pub struct EventData { - kind: EventKind, - name: String, - - // This is not a `Map`, to prevent RudderStack from collapsing these fields into columns - // in the analytics DB. - payload: Vec<(String, Value)>, -} - -#[derive(Debug, Clone, Serialize)] -pub enum EventKind { - Input, - Output, -} - -impl EventData { - pub fn input_stage(name: &str) -> Self { - Self { - kind: EventKind::Input, - name: name.to_string(), - payload: Vec::new(), - } - } - - pub fn output_stage(name: &str) -> Self { - Self { - kind: EventKind::Output, - name: name.to_string(), - payload: Vec::new(), - } - } - - pub fn with_payload(mut self, name: &str, payload: T) -> Self { - self.payload - .push((name.to_string(), serde_json::to_value(payload).unwrap())); - self - } -} - -#[derive(Debug, serde::Serialize)] -pub struct PackageMetadata { - pub name: &'static str, - pub version: &'static str, - pub git_rev: &'static str, -} - -pub struct RudderHub { - /// Rudderstack options - options: Option, - - /// Rudderstack client - client: RudderAnalytics, - - /// User-specific store - user_store: PersistedState>, - - /// Device-specific unique identifier - device_id: PersistedState, -} - -#[derive(Default)] -pub struct HubOptions { - pub package_metadata: Option, -} - -#[derive(Serialize, Deserialize)] -pub struct DeviceId(String); - -/// User-specific configuration -#[derive(Serialize, Deserialize)] -pub struct UserState { - #[serde(default)] - pub tracking_id: String, -} - -impl RudderHub { - #[tracing::instrument(skip_all)] - pub fn new_with_options( - state: &StateSource, - device_id: impl Into>, - key: String, - data_plane: String, - options: impl Into>, - ) -> anyhow::Result> { - let client = RudderAnalytics::load(key, data_plane); - tracing::debug!("client initialized"); - - Ok(Self { - client, - options: options.into(), - user_store: state.load_or_default("user_tracking")?, - device_id: state.load_state_or("device_id", device_id.into())?, - } - .into()) - } - - pub fn device_id(&self) -> String { - self.device_id.0.trim().to_owned() - } - - pub fn tracking_id(&self, username: Option<&str>) -> String { - match username { - Some(username) => { - let id = self - .user_store - .entry(username.to_owned()) - .or_default() - .get() - .tracking_id - .clone(); - _ = self.user_store.store(); - id - } - None => self.device_id(), - } - } - - /// Send a message, logging an error if it occurs. - /// - /// This will internally `block_in_place`. - pub fn send(&self, message: Message) { - if let Err(err) = tokio::task::block_in_place(|| self.client.send(&message)) { - warn!(?err, "failed to send analytics event"); - } else { - info!("sent analytics event..."); - } - } - - pub fn track_query(&self, user: &crate::webserver::middleware::User, event: QueryEvent) { - if let Some(options) = &self.options { - self.send(Message::Track(Track { - user_id: Some(self.tracking_id(user.username())), - event: "openai query".to_owned(), - properties: Some(json!({ - "device_id": self.device_id(), - "query_id": event.query_id, - "thread_id": event.thread_id, - "data": event.data, - "package_metadata": options.package_metadata, - })), - ..Default::default() - })); - } - } - - pub fn track_studio(&self, user: &crate::webserver::middleware::User, event: StudioEvent) { - if let Some(options) = &self.options { - self.send(Message::Track(Track { - user_id: Some(self.tracking_id(user.username())), - event: "code studio".to_owned(), - properties: Some(json!({ - "device_id": self.device_id(), - "event_type": event.type_, - "studio_id": event.studio_id, - "payload": event.payload, - "package_metadata": options.package_metadata, - })), - ..Default::default() - })); - } - } - - pub fn track_doc(&self, user: &crate::webserver::middleware::User, event: DocEvent) { - if let Some(options) = &self.options { - self.send(Message::Track(Track { - user_id: Some(self.tracking_id(user.username())), - event: "doc".to_owned(), - properties: Some(json!({ - "device_id": self.device_id(), - "event_type": event.type_, - "payload": event.payload, - "package_metadata": options.package_metadata, - })), - ..Default::default() - })); - } - } - - pub fn track_synced_repos(&self, count: usize, username: Option<&str>, org_name: Option<&str>) { - self.send(Message::Track(Track { - user_id: Some(self.tracking_id(username)), - event: "track_synced_repos".into(), - properties: Some(serde_json::json!({ "count": count, "org_name": org_name })), - ..Default::default() - })); - } - - pub fn track_repo(&self, event: RepoEvent, user: &crate::webserver::middleware::User) { - self.send(Message::Track(Track { - user_id: Some(self.tracking_id(user.username())), - event: "track_repo_index".into(), - properties: Some(serde_json::json!({ - "payload": event.payload - })), - ..Default::default() - })); - } -} - -impl From> for DeviceId { - fn from(value: Option) -> Self { - match value { - Some(val) => DeviceId(val), - None => Self::default(), - } - } -} - -impl Default for DeviceId { - fn default() -> Self { - Self(uuid::Uuid::new_v4().to_string()) - } -} - -impl Default for UserState { - fn default() -> Self { - let tracking_id = uuid::Uuid::new_v4().to_string(); - Self { tracking_id } - } -} diff --git a/server/bleep/src/bin/bleep.rs b/server/bleep/src/bin/bleep.rs index d9b2182094..9e4d78b892 100644 --- a/server/bleep/src/bin/bleep.rs +++ b/server/bleep/src/bin/bleep.rs @@ -15,7 +15,7 @@ async fn main() -> Result<()> { _ = color_eyre::install(); Application::install_logging(&config); - let app = Application::initialize(Environment::server(), config, None, None).await?; + let app = Application::initialize(Environment::server(), config).await?; app.initialize_sentry(); app.run().await diff --git a/server/bleep/src/indexes/analytics.rs b/server/bleep/src/indexes/analytics.rs index aaab8a3851..a354a52182 100644 --- a/server/bleep/src/indexes/analytics.rs +++ b/server/bleep/src/indexes/analytics.rs @@ -1,5 +1,4 @@ -use crate::{analytics::RepoEvent, repo::RepoRef}; -use tokio::{sync::mpsc, time::Instant}; +use tokio::sync::mpsc; #[derive(Default)] pub struct WorkerStats { @@ -19,44 +18,28 @@ impl std::ops::AddAssign for WorkerStats { } } -#[derive(serde::Serialize, Clone, Copy, PartialEq, Eq)] -enum IndexJobKind { - Index, - PeriodicSync { reindex_file_count: usize }, - SchemaUpgrade { reindex_file_count: usize }, -} - // the main entrypoint into gathering analytics for an index job pub struct StatsGatherer { // reciever of stats from worker threads stats_rx: mpsc::UnboundedReceiver, // pass this along to each worker thread stats_tx: mpsc::UnboundedSender, - // the reporef of the target index job - reporef: RepoRef, - // the moment this job began - start_time: Instant, // set to true if this is the first index of this reporef pub is_first_index: bool, // set to true if the index was reset on startup pub was_index_reset: bool, - // gather analytics events into this `event` field - pub event: RepoEvent, // combine stats from each worker thread into `repo_stats` pub repo_stats: WorkerStats, } impl StatsGatherer { - pub fn for_repo(reporef: RepoRef) -> Self { + pub fn for_repo() -> Self { let (stats_tx, stats_rx) = mpsc::unbounded_channel(); Self { stats_rx, stats_tx, - event: RepoEvent::new("index"), - reporef, is_first_index: false, was_index_reset: false, - start_time: Instant::now(), repo_stats: WorkerStats::default(), } } @@ -72,51 +55,4 @@ impl StatsGatherer { self.repo_stats += stats; } } - - // Consume self to produce analytics event - #[rustfmt::skip] - pub fn event(mut self) -> RepoEvent { - // determine the type of index job run - // - let job_kind = if self.was_index_reset { - IndexJobKind::SchemaUpgrade { - reindex_file_count: self.repo_stats.reindex_count, - } - } else if self.is_first_index { - IndexJobKind::Index - } else { - IndexJobKind::PeriodicSync { - reindex_file_count: self.repo_stats.reindex_count, - } - }; - self.event.add_payload("reporef", &self.reporef.name()); - self.event.add_payload("provider", &self.reporef.backend()); - self.event.add_payload("index_job_kind", &job_kind); - self.event.add_payload("chunk_count", &self.repo_stats.chunks); - self.event.add_payload("bytes", &human_readable(self.repo_stats.size)); - self.event.add_payload("sync_time", &format!("{:?}", self.start_time.elapsed())); - self.event - } -} - -fn human_readable(size: usize) -> String { - let suffixes = ["B", "KB", "MB", "GB"]; - let s = suffixes - .iter() - .zip(0..10) - .rev() - .map(|(suf, exp)| (suf, size as f64 / (1024_f64.powi(exp)))) - .find(|(_, t)| t >= &1.0); - s.map(|(suffix, value)| format!("{value:.2}{suffix}")) - .unwrap_or_else(|| size.to_string()) -} - -#[cfg(test)] -mod test { - #[test] - fn human_readable() { - assert_eq!(super::human_readable(15), "15.00B"); - assert_eq!(super::human_readable(1024), "1.00KB"); - assert_eq!(super::human_readable(7616597515), "7.09GB"); - } } diff --git a/server/bleep/src/indexes/file.rs b/server/bleep/src/indexes/file.rs index 6d866d6206..85758af083 100644 --- a/server/bleep/src/indexes/file.rs +++ b/server/bleep/src/indexes/file.rs @@ -112,7 +112,7 @@ impl Indexable for File { let cache = file_cache.retrieve(reporef).await; let repo_name = reporef.indexed_name(); let processed = &AtomicU64::new(0); - let mut stats_gatherer = StatsGatherer::for_repo(reporef.clone()); + let mut stats_gatherer = StatsGatherer::for_repo(); stats_gatherer.is_first_index = cache.is_empty(); stats_gatherer.was_index_reset = app.indexes.was_index_reset; @@ -177,7 +177,6 @@ impl Indexable for File { repo.branch_filter.as_ref().map(Into::into), )?; let count = walker.len(); - stats_gatherer.event.add_payload("file_count", &count); walker.for_each(pipes, file_worker(count)); } else { let branch = gix::open::Options::isolated() @@ -198,7 +197,6 @@ impl Indexable for File { let walker = FileWalker::index_directory(&repo.disk_path, branch); let count = walker.len(); - stats_gatherer.event.add_payload("file_count", &count); walker.for_each(pipes, file_worker(count)); }; @@ -209,11 +207,6 @@ impl Indexable for File { info!(?repo.disk_path, "repo file indexing finished, took {:?}", start.elapsed()); stats_gatherer.finish().await; - if stats_gatherer.repo_stats.reindex_count > 0 { - let user = app.user().await; - let event = stats_gatherer.event(); - app.with_analytics(|hub| hub.track_repo(event, &user)); - } file_cache .synchronize(cache, |key| { diff --git a/server/bleep/src/lib.rs b/server/bleep/src/lib.rs index dac3529e7a..5b1ee4068d 100644 --- a/server/bleep/src/lib.rs +++ b/server/bleep/src/lib.rs @@ -35,7 +35,7 @@ use crate::{ background::SyncQueue, indexes::Indexes, remotes::CognitoGithubTokenBundle, semantic::Semantic, state::RepositoryPool, webserver::middleware::User, }; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use axum::extract::FromRef; use once_cell::sync::OnceCell; @@ -66,7 +66,6 @@ mod webserver; mod ee; -pub mod analytics; pub mod indexes; pub mod intelligence; pub mod periodic; @@ -119,18 +118,10 @@ pub struct Application { /// SQL database for persistent storage pub sql: SqlDb, - - /// Analytics backend -- may be unintialized - pub analytics: Option>, } impl Application { - pub async fn initialize( - env: Environment, - mut config: Configuration, - tracking_seed: impl Into>, - analytics_options: impl Into>, - ) -> Result { + pub async fn initialize(env: Environment, mut config: Configuration) -> Result { config.max_threads = config.max_threads.max(minimum_parallelism()); let threads = config.max_threads; @@ -187,15 +178,6 @@ impl Application { env }; - // Analytics backend - let analytics = match initialize_analytics(&config, tracking_seed, analytics_options) { - Ok(analytics) => Some(analytics), - Err(err) => { - warn!(?err, "failed to initialize analytics"); - None - } - }; - Ok(Self { sync_queue: SyncQueue::start(config.clone()), cookie_key: config.source.initialize_cookie_key()?, @@ -206,7 +188,6 @@ impl Application { sql, indexes, repo_pool, - analytics, semantic, config, env, @@ -305,23 +286,6 @@ impl Application { false } - fn track_query(&self, user: &User, event: &analytics::QueryEvent) { - if let Some(analytics) = self.analytics.as_ref() { - analytics.track_query(user, event.clone()); - } - } - - fn track_studio(&self, user: &User, event: analytics::StudioEvent) { - if let Some(analytics) = self.analytics.as_ref() { - analytics.track_studio(user, event); - } - } - - /// Run a closure over the current `analytics` instance, if it exists. - fn with_analytics(&self, f: impl FnOnce(&Arc) -> R) -> Option { - self.analytics.as_ref().map(f) - } - pub fn username(&self) -> Option { self.credentials.user().clone() } @@ -464,38 +428,3 @@ where Level::TRACE => EventFilter::Ignore, }) } - -#[tracing::instrument(skip_all)] -fn initialize_analytics( - config: &Configuration, - tracking_seed: impl Into>, - options: impl Into>, -) -> Result> { - debug!("creating configuration"); - - let Some(key) = &config.analytics_key else { - bail!("analytics key missing; skipping initialization"); - }; - - let Some(data_plane) = &config.analytics_data_plane else { - bail!("analytics data plane url missing; skipping initialization"); - }; - - let options = options.into().unwrap_or_else(|| analytics::HubOptions { - package_metadata: Some(analytics::PackageMetadata { - name: env!("CARGO_CRATE_NAME"), - version: env!("CARGO_PKG_VERSION"), - git_rev: git_version::git_version!(fallback = "unknown"), - }), - }); - - tokio::task::block_in_place(|| { - analytics::RudderHub::new_with_options( - &config.source, - tracking_seed, - key.clone(), - data_plane.clone(), - options, - ) - }) -} diff --git a/server/bleep/src/webserver.rs b/server/bleep/src/webserver.rs index 369a70a092..79ca17ba1c 100644 --- a/server/bleep/src/webserver.rs +++ b/server/bleep/src/webserver.rs @@ -122,7 +122,6 @@ pub async fn start(app: Application) -> anyhow::Result<()> { get(autocomplete::handle), ) .route("/projects/:project_id/search/path", get(search::fuzzy_path)) - .route("/projects/:project_id/answer/vote", post(answer::vote)) .route("/projects/:project_id/answer", get(answer::answer)) .route("/projects/:project_id/answer/explain", get(answer::explain)) .route("/projects/:project_id/studios", post(studio::create)) @@ -315,10 +314,6 @@ impl Error { }, } } - - fn message(&self) -> &str { - self.body.message.as_ref() - } } impl From for Error { diff --git a/server/bleep/src/webserver/answer.rs b/server/bleep/src/webserver/answer.rs index 50c4486d01..ec70d7103a 100644 --- a/server/bleep/src/webserver/answer.rs +++ b/server/bleep/src/webserver/answer.rs @@ -7,7 +7,7 @@ use axum::{ sse::{self, Sse}, IntoResponse, }, - Extension, Json, + Extension, }; use futures::{future::Either, stream, StreamExt}; use reqwest::StatusCode; @@ -20,7 +20,6 @@ use crate::{ exchange::{CodeChunk, Exchange, FocusedChunk, RepoPath}, Action, Agent, ExchangeState, }, - analytics::{EventData, QueryEvent}, db::QueryLog, query::parser::{self, Literal}, repo::RepoRef, @@ -45,21 +44,6 @@ pub enum VoteFeedback { Negative { feedback: String }, } -pub(super) async fn vote( - Extension(app): Extension, - Extension(user): Extension, - Json(params): Json, -) { - app.track_query( - &user, - &QueryEvent { - query_id: params.query_id, - thread_id: params.thread_id, - data: EventData::output_stage("vote").with_payload("feedback", params.feedback), - }, - ); -} - #[derive(Clone, Debug, serde::Deserialize)] pub struct Answer { pub q: String, @@ -182,17 +166,6 @@ impl AgentExecutor { if let Err(err) = response.as_ref() { error!(?err, "failed to handle /answer query"); - - self.app.track_query( - &self.user, - &QueryEvent { - query_id: self.query_id, - thread_id: self.conversation.thread_id, - data: EventData::output_stage("error") - .with_payload("status", err.status.as_u16()) - .with_payload("message", err.message()), - }, - ); } response @@ -349,17 +322,9 @@ impl AgentExecutor { } Err(agent::Error::Timeout(duration)) => { warn!("Timeout reached."); - agent.track_query( - EventData::output_stage("error") - .with_payload("timeout", duration.as_secs()), - ); Err(anyhow!("reached timeout of {duration:?}"))?; } Err(agent::Error::Processing(e)) => { - agent.track_query( - EventData::output_stage("error") - .with_payload("message", e.to_string()), - ); Err(e)?; } }; diff --git a/server/bleep/src/webserver/config.rs b/server/bleep/src/webserver/config.rs index 7b3b4dfd48..dc6019232c 100644 --- a/server/bleep/src/webserver/config.rs +++ b/server/bleep/src/webserver/config.rs @@ -11,8 +11,6 @@ pub(super) struct ConfigResponse { user_login: Option, org_name: Option, schema_version: String, - tracking_id: String, - device_id: String, github_user: Option, bloop_user_profile: UserProfile, bloop_version: String, @@ -34,18 +32,6 @@ pub(super) async fn get( let user_login = user.username().map(str::to_owned); - let tracking_id = app - .analytics - .as_ref() - .map(|a| a.tracking_id(user_login.as_deref())) - .unwrap_or_default(); - - let device_id = app - .analytics - .as_ref() - .map(|a| a.device_id()) - .unwrap_or_default(); - let org_name = app.credentials.github().and_then(|cred| match cred.auth { remotes::github::Auth::App { org, .. } => Some(org), _ => None, @@ -71,9 +57,7 @@ pub(super) async fn get( paid: user.paid_features(&app).await, user_login, github_user, - device_id, org_name, - tracking_id, }) } diff --git a/server/bleep/src/webserver/docs.rs b/server/bleep/src/webserver/docs.rs index 8a0d8138b4..9c963fcb1a 100644 --- a/server/bleep/src/webserver/docs.rs +++ b/server/bleep/src/webserver/docs.rs @@ -1,5 +1,5 @@ use axum::{ - extract::{Extension, Json, Path, Query, State}, + extract::{Json, Path, Query, State}, response::{ sse::{Event, KeepAlive}, Sse, @@ -9,9 +9,8 @@ use futures::stream::{Stream, StreamExt}; use tracing::error; use crate::{ - analytics::DocEvent, indexes::doc, - webserver::{middleware::User, Error, Result}, + webserver::{Error, Result}, Application, }; @@ -63,15 +62,7 @@ pub async fn delete(State(app): State, Path(id): Path) -> Resu pub async fn enqueue( State(app): State, Query(params): Query, - Extension(user): Extension, ) -> Result> { - app.with_analytics(|hub| { - hub.track_doc( - &user, - DocEvent::new("sync").with_payload("url", ¶ms.url), - ) - }); - Ok(Json(app.indexes.doc.clone().enqueue(params.url).await?)) } diff --git a/server/bleep/src/webserver/github.rs b/server/bleep/src/webserver/github.rs index c3db8dff4a..8a06ca4cba 100644 --- a/server/bleep/src/webserver/github.rs +++ b/server/bleep/src/webserver/github.rs @@ -163,40 +163,6 @@ async fn poll_for_oauth_token(code: String, app: Application) { crate::periodic::validate_github_credentials(&app).await; crate::periodic::update_repo_list(&app).await; - let username = app - .credentials - .github() - .unwrap() - .client() - .unwrap() - .current() - .user() - .await - .unwrap() - .login; - - let tracking_id = app - .analytics - .as_ref() - .map(|a| a.tracking_id(Some(&username))) - .unwrap_or_default(); - - let user = app.user().await; - - app.with_analytics(|analytics| { - use rudderanalytics::message::{Identify, Message}; - analytics.send(Message::Identify(Identify { - user_id: Some(tracking_id.clone()), - traits: Some(serde_json::json!({ - "org_name": user.org_name(), - "device_id": analytics.device_id(), - "is_self_serve": app.env.is_cloud_instance(), - "github_username": username, - })), - ..Default::default() - })); - }); - if let Err(err) = app.credentials.store() { error!(?err, "failed to save credentials to disk"); } diff --git a/server/bleep/src/webserver/middleware.rs b/server/bleep/src/webserver/middleware.rs index 9459837fdd..c761fd5140 100644 --- a/server/bleep/src/webserver/middleware.rs +++ b/server/bleep/src/webserver/middleware.rs @@ -40,14 +40,6 @@ impl User { } } - pub(crate) fn org_name(&self) -> Option<&str> { - let User::Cloud { org_name, .. } = self else { - return None; - }; - - Some(org_name.as_ref()) - } - pub(crate) fn github_client(&self) -> Option { let crab = match self { User::Unknown => return None, diff --git a/server/bleep/src/webserver/repos.rs b/server/bleep/src/webserver/repos.rs index 636cb9d86b..0b3d673be1 100644 --- a/server/bleep/src/webserver/repos.rs +++ b/server/bleep/src/webserver/repos.rs @@ -15,7 +15,7 @@ use axum::{ use chrono::{DateTime, NaiveDateTime, Utc}; use serde::{Deserialize, Serialize}; -use super::{middleware::User, prelude::*}; +use super::prelude::*; #[derive(Serialize, Debug, PartialEq, Eq)] pub(crate) struct Branch { @@ -307,17 +307,10 @@ pub(super) async fn get_by_id( pub(super) async fn delete_by_id( Query(RepoParams { repo, .. }): Query, State(app): State, - Extension(user): Extension, ) -> Result { // TODO: We can refactor `repo_pool` to also hold queued repos, instead of doing a calculation // like this which is prone to timing issues. - let num_repos = app.repo_pool.len(); let found = app.write_index().remove(repo).await.is_some(); - let num_deleted = if found { 1 } else { 0 }; - - app.with_analytics(|analytics| { - analytics.track_synced_repos(num_repos - num_deleted, user.username(), user.org_name()); - }); if found { Ok(json(ReposResponse::Deleted)) @@ -330,19 +323,13 @@ pub(super) async fn delete_by_id( pub(super) async fn sync( Query(RepoParams { repo, shallow }): Query, State(app): State, - Extension(user): Extension, ) -> Result { // TODO: We can refactor `repo_pool` to also hold queued repos, instead of doing a calculation // like this which is prone to timing issues. - let num_repos = app.repo_pool.len(); app.write_index() .enqueue(SyncConfig::new(app.clone(), repo).shallow(shallow)) .await; - app.with_analytics(|analytics| { - analytics.track_synced_repos(num_repos + 1, user.username(), user.org_name()); - }); - Ok(json(ReposResponse::SyncQueued)) } @@ -407,15 +394,10 @@ pub(super) struct SetIndexed { // pub(super) async fn set_indexed( State(app): State, - Extension(user): Extension, Json(new_list): Json, ) -> impl IntoResponse { let mut repo_list = new_list.indexed.into_iter().collect::>(); - app.with_analytics(|analytics| { - analytics.track_synced_repos(repo_list.len(), user.username(), user.org_name()); - }); - app.repo_pool .for_each_async(|k, existing| { if !repo_list.contains(k) { diff --git a/server/bleep/src/webserver/studio.rs b/server/bleep/src/webserver/studio.rs index 42bdf3d98a..d48cf86923 100644 --- a/server/bleep/src/webserver/studio.rs +++ b/server/bleep/src/webserver/studio.rs @@ -24,7 +24,6 @@ use self::diff::{DiffChunk, DiffHunk}; use super::{middleware::User, Error}; use crate::{ agent::{exchange::Exchange, prompts}, - analytics::StudioEvent, llm_gateway, repo::RepoRef, webserver, Application, @@ -738,14 +737,6 @@ pub async fn generate( let doc_context = serde_json::from_str::>(&doc_context_json).map_err(Error::internal)?; - app.track_studio( - &user, - StudioEvent::new(studio_id, "generate") - .with_payload("context", &context) - .with_payload("doc_context", &doc_context) - .with_payload("messages", &messages), - ); - let llm_context = generate_llm_context((*app).clone(), &context, &doc_context).await?; let system_prompt = prompts::studio_article_prompt(&llm_context); let llm_messages = iter::once(llm_gateway::api::Message::system(&system_prompt)) @@ -765,12 +756,6 @@ pub async fn generate( yield response.clone(); } - app.track_studio( - &user, - StudioEvent::new(studio_id, "generate_complete") - .with_payload("response", &response) - ); - messages.push(Message::Assistant(response)); let messages_json = serde_json::to_string(&messages).unwrap(); @@ -995,14 +980,6 @@ pub async fn diff( }) .context("studio did not contain an assistant message")?; - app.track_studio( - &user, - StudioEvent::new(studio_id, "diff") - .with_payload("context", &context) - .with_payload("user_message", &user_message) - .with_payload("assistant_message", &assistant_message), - ); - let llm_context = generate_llm_context((*app).clone(), &context, &[]).await?; let system_prompt = prompts::studio_diff_prompt(&llm_context); @@ -1362,7 +1339,6 @@ pub async fn diff_apply( shallow: false, }), app.clone(), - user.clone(), ) .await?; } @@ -1535,14 +1511,6 @@ pub async fn import( .execute(&mut transaction) .await?; - app.track_studio( - &user, - StudioEvent::new(studio_id, "import") - .with_payload("thread_id", ¶ms.thread_id) - .with_payload("old_context", &old_context) - .with_payload("new_context", &new_context), - ); - transaction.commit().await?; Ok(studio_id.to_string())