diff --git a/src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs b/src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs index fba8c2154ca86..6564785a22fb3 100644 --- a/src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs +++ b/src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs @@ -52,34 +52,61 @@ public static async Task CreateAsync( Workspace workspace, CancellationToken cancellationToken) { using (Logger.LogBlock(FunctionId.ServiceHubRemoteHostClient_CreateAsync, cancellationToken)) + { + var primary = new HubClient("ManagedLanguage.IDE.RemoteHostClient"); + var timeout = TimeSpan.FromMilliseconds(workspace.Options.GetOption(RemoteHostOptions.RequestServiceTimeoutInMS)); + + // Retry (with timeout) until we can connect to RemoteHost (service hub process). + // we are seeing cases where we failed to connect to service hub process when a machine is under heavy load. + // (see https://devdiv.visualstudio.com/DevDiv/_workitems/edit/481103 as one of example) + var instance = await RetryRemoteCallAsync( + () => CreateWorkerAsync(workspace, primary, timeout, cancellationToken), timeout, cancellationToken).ConfigureAwait(false); + + instance.Started(); + + // Create a workspace host to hear about workspace changes. We'll + // remote those changes over to the remote side when they happen. + await RegisterWorkspaceHostAsync(workspace, instance).ConfigureAwait(false); + + // return instance + return instance; + } + } + + public static async Task CreateWorkerAsync(Workspace workspace, HubClient primary, TimeSpan timeout, CancellationToken cancellationToken) + { + ServiceHubRemoteHostClient client = null; + try { // let each client to have unique id so that we can distinguish different clients when service is restarted var currentInstanceId = Interlocked.Add(ref s_instanceId, 1); - var primary = new HubClient("ManagedLanguage.IDE.RemoteHostClient"); var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})"; var hostGroup = new HostGroup(current); - var timeout = TimeSpan.FromMilliseconds(workspace.Options.GetOption(RemoteHostOptions.RequestServiceTimeoutInMS)); var remoteHostStream = await RequestServiceAsync(primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false); - var remotableDataRpc = new RemotableDataJsonRpc(workspace, primary.Logger, await RequestServiceAsync(primary, WellKnownServiceHubServices.SnapshotService, hostGroup, timeout, cancellationToken).ConfigureAwait(false)); - var instance = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, new ReferenceCountedDisposable(remotableDataRpc), remoteHostStream); + var remotableDataRpc = new RemotableDataJsonRpc( + workspace, primary.Logger, await RequestServiceAsync(primary, WellKnownServiceHubServices.SnapshotService, hostGroup, timeout, cancellationToken).ConfigureAwait(false)); + client = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, new ReferenceCountedDisposable(remotableDataRpc), remoteHostStream); // make sure connection is done right - var host = await instance._rpc.InvokeAsync(nameof(IRemoteHostService.Connect), current, TelemetryService.DefaultSession.SerializeSettings()).ConfigureAwait(false); - - // TODO: change this to non fatal watson and make VS to use inproc implementation - Contract.ThrowIfFalse(host == current.ToString()); + var host = await client._rpc.InvokeWithCancellationAsync( + nameof(IRemoteHostService.Connect), new object[] { current, TelemetryService.DefaultSession.SerializeSettings() }, cancellationToken).ConfigureAwait(false); - instance.Started(); + return client; + } + catch (Exception ex) + { + // make sure we shutdown client if initializing client has failed. + client?.Shutdown(); - // Create a workspace host to hear about workspace changes. We'll - // remote those changes over to the remote side when they happen. - await RegisterWorkspaceHostAsync(workspace, instance).ConfigureAwait(false); + // translate to our own cancellation if it is raised. + cancellationToken.ThrowIfCancellationRequested(); - // return instance - return instance; + // otherwise, report watson and throw original exception + ex.ReportServiceHubNFW("ServiceHub creation failed"); + throw; } } @@ -263,6 +290,40 @@ private void OnRpcDisconnected(object sender, JsonRpcDisconnectedEventArgs e) Stopped(); } + /// + /// call and retry up to if the call throws + /// . any other exception from the call won't be handled here. + /// + private static async Task RetryRemoteCallAsync( + Func> funcAsync, + TimeSpan timeout, + CancellationToken cancellationToken) where TException : Exception + { + const int retry_delayInMS = 50; + + var start = DateTime.UtcNow; + while (DateTime.UtcNow - start < timeout) + { + cancellationToken.ThrowIfCancellationRequested(); + + try + { + return await funcAsync().ConfigureAwait(false); + } + catch (TException) + { + // throw cancellation token if operation is cancelled + cancellationToken.ThrowIfCancellationRequested(); + } + + // wait for retry_delayInMS before next try + await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false); + } + + // operation timed out, more than we are willing to wait + throw new TimeoutException("RequestServiceAsync timed out"); + } + private static async Task RequestServiceAsync( HubClient client, string serviceName, @@ -283,7 +344,17 @@ private static async Task RequestServiceAsync( { try { - return await RequestServiceAsync(client, descriptor, timeout, cancellationToken).ConfigureAwait(false); + // we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves. + // we have bug opened to track the issue. + // https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit + + // retry on cancellation token since HubClient will throw its own cancellation token + // when it couldn't connect to service hub service for some reasons + // (ex, OOP process GC blocked and not responding to request) + return await RetryRemoteCallAsync( + () => client.RequestServiceAsync(descriptor, cancellationToken), + timeout, + cancellationToken).ConfigureAwait(false); } catch (RemoteInvocationException ex) { @@ -310,40 +381,5 @@ private static async Task RequestServiceAsync( // unreachable throw ExceptionUtilities.Unreachable; } - - private static async Task RequestServiceAsync(HubClient client, ServiceDescriptor descriptor, TimeSpan timeout, CancellationToken cancellationToken = default) - { - // we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves. - // we have bug opened to track the issue. - // https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit - const int retry_delayInMS = 50; - - var start = DateTime.UtcNow; - while (start - DateTime.UtcNow < timeout) - { - cancellationToken.ThrowIfCancellationRequested(); - - try - { - return await client.RequestServiceAsync(descriptor, cancellationToken).ConfigureAwait(false); - } - catch (OperationCanceledException) - { - // if it is our own cancellation token, then rethrow - // otherwise, let us retry. - // - // we do this since HubClient itself can throw its own cancellation token - // when it couldn't connect to service hub service for some reasons - // (ex, OOP process GC blocked and not responding to request) - cancellationToken.ThrowIfCancellationRequested(); - } - - // wait for retry_delayInMS before next try - await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false); - } - - // request service to HubClient timed out, more than we are willing to wait - throw new TimeoutException("RequestServiceAsync timed out"); - } } } diff --git a/src/VisualStudio/Core/Test.Next/Services/ServiceHubServicesTests.cs b/src/VisualStudio/Core/Test.Next/Services/ServiceHubServicesTests.cs index 99cc07a1e4b47..e5214949139b6 100644 --- a/src/VisualStudio/Core/Test.Next/Services/ServiceHubServicesTests.cs +++ b/src/VisualStudio/Core/Test.Next/Services/ServiceHubServicesTests.cs @@ -37,7 +37,7 @@ public void TestRemoteHostConnect() var remoteHostService = CreateService(); var input = "Test"; - var output = remoteHostService.Connect(input, serializedSession: null); + var output = remoteHostService.Connect(input, serializedSession: null, cancellationToken: CancellationToken.None); Assert.Equal(input, output); } diff --git a/src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs b/src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs index ba07d417c230d..74fd1af4397c6 100644 --- a/src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs +++ b/src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs @@ -8,7 +8,7 @@ namespace Microsoft.CodeAnalysis.Remote { internal interface IRemoteHostService { - string Connect(string host, string serializedSession); + string Connect(string host, string serializedSession, CancellationToken cancellationToken); Task SynchronizePrimaryWorkspaceAsync(Checksum checksum, CancellationToken cancellationToken); Task SynchronizeGlobalAssetsAsync(Checksum[] checksums, CancellationToken cancellationToken); diff --git a/src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs b/src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs index 9930e3e2e1c20..20188e4ea57c2 100644 --- a/src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs +++ b/src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs @@ -41,11 +41,6 @@ static RemoteHostService() // we set up logger here RoslynLogger.SetLogger(new EtwLogger(GetLoggingChecker())); - // Set this process's priority BelowNormal. - // this should let us to freely try to use all resources possible without worrying about affecting - // host's work such as responsiveness or build. - Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal; - SetNativeDllSearchDirectories(); } @@ -56,10 +51,12 @@ public RemoteHostService(Stream stream, IServiceProvider serviceProvider) : Rpc.StartListening(); } - public string Connect(string host, string serializedSession) + public string Connect(string host, string serializedSession, CancellationToken cancellationToken) { return RunService(() => { + cancellationToken.ThrowIfCancellationRequested(); + _primaryInstance = InstanceId; var existing = Interlocked.CompareExchange(ref _host, host, null); @@ -74,8 +71,17 @@ public string Connect(string host, string serializedSession) // log telemetry that service hub started RoslynLogger.Log(FunctionId.RemoteHost_Connect, KeyValueLogMessage.Create(SetSessionInfo)); + // serializedSession will be null for testing + if (serializedSession != null) + { + // Set this process's priority BelowNormal. + // this should let us to freely try to use all resources possible without worrying about affecting + // host's work such as responsiveness or build. + Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal; + } + return _host; - }, CancellationToken.None); + }, cancellationToken); } public Task SynchronizePrimaryWorkspaceAsync(Checksum checksum, CancellationToken cancellationToken)