From f02efe79fb1f1ed52883228f12aa0e9fdfce664a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= Date: Fri, 11 Feb 2022 08:54:56 +0100 Subject: [PATCH] Remove superfluous `cudaStreamSynchronize()` calls --- include/tiny-cuda-nn/optimizers/shampoo.h | 1 - include/tiny-cuda-nn/reduce_sum.h | 1 - 2 files changed, 2 deletions(-) diff --git a/include/tiny-cuda-nn/optimizers/shampoo.h b/include/tiny-cuda-nn/optimizers/shampoo.h index 59751813..cbf4d377 100644 --- a/include/tiny-cuda-nn/optimizers/shampoo.h +++ b/include/tiny-cuda-nn/optimizers/shampoo.h @@ -644,7 +644,6 @@ class ShampooOptimizer : public Optimizer { }); CUDA_CHECK_THROW(cudaMemcpyAsync(delta.data(), sum_tmp, n_matrices * sizeof(ROOT_TYPE), cudaMemcpyDeviceToHost, stream)); - CUDA_CHECK_THROW(cudaStreamSynchronize(stream)); if (std::any_of(std::begin(delta), std::end(delta), [](ROOT_TYPE v) { return !std::isfinite(v); })) { std::cout << "Failed to converge! " << delta[0] << std::endl; diff --git a/include/tiny-cuda-nn/reduce_sum.h b/include/tiny-cuda-nn/reduce_sum.h index 617c6de1..65a42bbd 100644 --- a/include/tiny-cuda-nn/reduce_sum.h +++ b/include/tiny-cuda-nn/reduce_sum.h @@ -149,7 +149,6 @@ float reduce_sum(T* device_pointer, F fun, uint32_t n_elements, cudaStream_t str float sum; CUDA_CHECK_THROW(cudaMemcpyAsync(&sum, workspace_data, sizeof(float), cudaMemcpyDeviceToHost, stream)); - CUDA_CHECK_THROW(cudaStreamSynchronize(stream)); return sum; }