Skip to content

Commit

Permalink
Remove superfluous cudaStreamSynchronize() calls
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom94 committed Feb 11, 2022
1 parent d829746 commit f02efe7
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 2 deletions.
1 change: 0 additions & 1 deletion include/tiny-cuda-nn/optimizers/shampoo.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,6 @@ class ShampooOptimizer : public Optimizer<T> {
});

CUDA_CHECK_THROW(cudaMemcpyAsync(delta.data(), sum_tmp, n_matrices * sizeof(ROOT_TYPE), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK_THROW(cudaStreamSynchronize(stream));

if (std::any_of(std::begin(delta), std::end(delta), [](ROOT_TYPE v) { return !std::isfinite(v); })) {
std::cout << "Failed to converge! " << delta[0] << std::endl;
Expand Down
1 change: 0 additions & 1 deletion include/tiny-cuda-nn/reduce_sum.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ float reduce_sum(T* device_pointer, F fun, uint32_t n_elements, cudaStream_t str

float sum;
CUDA_CHECK_THROW(cudaMemcpyAsync(&sum, workspace_data, sizeof(float), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK_THROW(cudaStreamSynchronize(stream));
return sum;
}

Expand Down

0 comments on commit f02efe7

Please sign in to comment.