Skip to content

Commit

Permalink
Remove CUDA 9.2 references conditionals and workarounds (pytorch#65070)
Browse files Browse the repository at this point in the history
Summary:
Title says it all

Pull Request resolved: pytorch#65070

Reviewed By: malfet

Differential Revision: D30966464

Pulled By: janeyx99

fbshipit-source-id: e454906fd5d7d321d390939ba5d237e1d9b150f8
  • Loading branch information
janeyx99 authored and facebook-github-bot committed Sep 17, 2021
1 parent 51e12f0 commit 1ee66a5
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 120 deletions.
3 changes: 1 addition & 2 deletions aten/src/ATen/native/cuda/BatchLinearAlgebraLib.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
#include <ATen/native/LinearAlgebraUtils.h>
#include <ATen/native/cuda/MiscUtils.h>

#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION) && CUSOLVER_VERSION >= 10200
// some cusolver functions don't work well on cuda 9.2 or cuda 10.1.105, cusolver is used on cuda >= 10.1.243
#if defined(CUDART_VERSION) && defined(CUSOLVER_VERSION)
#define USE_CUSOLVER
#endif

Expand Down
4 changes: 0 additions & 4 deletions c10/util/Optional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@

#include <type_traits>

// CUDA 9.2 and below fail while trying to compile default move constructor
// see https://github.com/pytorch/csprng/issues/84
#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
static_assert(
C10_IS_TRIVIALLY_COPYABLE(c10::optional<int>),
"c10::optional<int> should be trivially copyable");
Expand All @@ -18,4 +15,3 @@ static_assert(
static_assert(
sizeof(c10::optional<c10::IntArrayRef>) == sizeof(c10::IntArrayRef),
"c10::optional<IntArrayRef> should be size-optimized");
#endif
41 changes: 0 additions & 41 deletions c10/util/Optional.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,6 @@ template <typename T>
struct is_arrayref<c10::ArrayRef<T>> : std::true_type {};
} // namespace detail_

// CUDA 9.2 and below fail while trying to compile default move constructor
// see https://github.com/pytorch/csprng/issues/84
#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
template <class T>
using OptionalBase = std::conditional_t<
detail_::is_arrayref<T>::value,
Expand All @@ -524,23 +521,9 @@ using OptionalBase = std::conditional_t<
// trivial
// destructor
optional_base<std::remove_const_t<T>>>>>;
#else
template <class T>
using OptionalBase = std::conditional_t<
detail_::is_arrayref<T>::value,
arrayref_optional_base<T>,
std::conditional_t<
std::is_trivially_destructible<T>::value, // if possible
constexpr_optional_base<std::remove_const_t<T>>, // use base with
// trivial destructor
optional_base<std::remove_const_t<T>>>>;
#endif

template <class T>
class optional : private OptionalBase<T> {
// CUDA 9.2 and below fail while trying to compile default move constructor
// see https://github.com/pytorch/csprng/issues/84
#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
template <class U> // re-declaration for nvcc on Windows.
using OptionalBase = std::conditional_t<
detail_::is_arrayref<U>::value,
Expand All @@ -565,17 +548,6 @@ class optional : private OptionalBase<T> {
// trivial
// destructor
optional_base<std::remove_const_t<U>>>>>;
#else
template <class U>
using OptionalBase = std::conditional_t<
detail_::is_arrayref<U>::value,
arrayref_optional_base<U>,
std::conditional_t<
std::is_trivially_destructible<U>::value, // if possible
constexpr_optional_base<std::remove_const_t<U>>, // use base with
// trivial destructor
optional_base<std::remove_const_t<U>>>>;
#endif

static_assert(
!std::is_same<typename std::decay<T>::type, nullopt_t>::value,
Expand Down Expand Up @@ -634,20 +606,7 @@ class optional : private OptionalBase<T> {
constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};

optional(const optional& rhs) = default;

// CUDA 9.2 and below fail while trying to compile default move constructor
// see https://github.com/pytorch/csprng/issues/84
#if (!defined(__CUDA_ARCH__) || !defined(CUDA_VERSION) || CUDA_VERSION > 9200)
optional(optional&& rhs) = default;
#else
optional(optional&& rhs) noexcept(
std::is_nothrow_move_constructible<T>::value) {
if (rhs.initialized()) {
::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
OptionalBase<T>::setInitialized(true);
}
}
#endif

// see https://github.com/akrzemi1/Optional/issues/16
// and https://en.cppreference.com/w/cpp/utility/optional/optional,
Expand Down
6 changes: 0 additions & 6 deletions caffe2/core/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -731,14 +731,8 @@ inline vector<int16_t> OperatorBase::GetVectorFromIValueList<int16_t>(

// OP_SINGLE_ARG provides a shorter initialization choice for initialization of
// member variables for the class constructors.
// This is a workaround for CUDA9.2 and GCC7
#if defined(CUDART_VERSION) && CUDART_VERSION >= 9020 && __GNUC__ >= 7
#define OP_SINGLE_ARG(type, name, variable, default) \
variable(this->template GetSingleArgument<type>(name, (default)))
#else
#define OP_SINGLE_ARG(type, name, variable, default) \
variable(OperatorBase::GetSingleArgument<type>(name, (default)))
#endif

// INPUT_TAGS and OUTPUT_TAGS are optional features to name the indices of the
// operator's inputs and outputs, in order to avoid confusion. For example, for
Expand Down
98 changes: 46 additions & 52 deletions test/cpp/jit/test_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2606,44 +2606,40 @@ TEST(NVFuserTest, FusionUnaryOps_CUDA) {
using OpTuple =
std::tuple<at::Tensor (*)(const at::Tensor&), UnaryOpType, std::string>;

// [Note: explicit tuple type for uniform initialization list]
// Tuple type must be explicitly specified for each uniform initialization
// list within the vector to make this code compatible with some old env
// which we still need to support. eg. gcc 5.4 + cuda 9.2.
std::vector<OpTuple> ops{
OpTuple{at::abs, UnaryOpType::Abs, "abs"},
OpTuple{at::acos, UnaryOpType::Acos, "acos"},
OpTuple{at::asin, UnaryOpType::Asin, "asin"},
OpTuple{at::atan, UnaryOpType::Atan, "atan"},
{at::abs, UnaryOpType::Abs, "abs"},
{at::acos, UnaryOpType::Acos, "acos"},
{at::asin, UnaryOpType::Asin, "asin"},
{at::atan, UnaryOpType::Atan, "atan"},
// There does not appear to be an appropriate ATen function for atanh
// OpTuple{at::atanh, UnaryOpType::Atanh, "atanh" },
OpTuple{at::ceil, UnaryOpType::Ceil, "ceil"},
OpTuple{at::cos, UnaryOpType::Cos, "cos"},
OpTuple{at::cosh, UnaryOpType::Cosh, "cosh"},
OpTuple{at::erf, UnaryOpType::Erf, "erf"},
OpTuple{at::erfc, UnaryOpType::Erfc, "erfc"},
OpTuple{at::exp, UnaryOpType::Exp, "exp"},
OpTuple{at::expm1, UnaryOpType::Expm1, "expm1"},
OpTuple{at::floor, UnaryOpType::Floor, "floor"},
OpTuple{at::frac, UnaryOpType::Frac, "frac"},
OpTuple{at::gelu, UnaryOpType::Gelu, "gelu"},
OpTuple{at::lgamma, UnaryOpType::Lgamma, "lgamma"},
OpTuple{at::log, UnaryOpType::Log, "log"},
OpTuple{at::log10, UnaryOpType::Log10, "log10"},
OpTuple{at::log1p, UnaryOpType::Log1p, "log1p"},
OpTuple{at::log2, UnaryOpType::Log2, "log2"},
OpTuple{at::neg, UnaryOpType::Neg, "neg"},
OpTuple{at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"},
OpTuple{at::relu, UnaryOpType::Relu, "relu"},
OpTuple{at::round, UnaryOpType::Round, "round"},
OpTuple{at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"},
OpTuple{at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"},
OpTuple{at::sin, UnaryOpType::Sin, "sin"},
OpTuple{at::sinh, UnaryOpType::Sinh, "sinh"},
OpTuple{at::sqrt, UnaryOpType::Sqrt, "sqrt"},
OpTuple{at::tan, UnaryOpType::Tan, "tan"},
OpTuple{at::tanh, UnaryOpType::Tanh, "tanh"},
OpTuple{at::trunc, UnaryOpType::Trunc, "trunc"}};
// {at::atanh, UnaryOpType::Atanh, "atanh" },
{at::ceil, UnaryOpType::Ceil, "ceil"},
{at::cos, UnaryOpType::Cos, "cos"},
{at::cosh, UnaryOpType::Cosh, "cosh"},
{at::erf, UnaryOpType::Erf, "erf"},
{at::erfc, UnaryOpType::Erfc, "erfc"},
{at::exp, UnaryOpType::Exp, "exp"},
{at::expm1, UnaryOpType::Expm1, "expm1"},
{at::floor, UnaryOpType::Floor, "floor"},
{at::frac, UnaryOpType::Frac, "frac"},
{at::gelu, UnaryOpType::Gelu, "gelu"},
{at::lgamma, UnaryOpType::Lgamma, "lgamma"},
{at::log, UnaryOpType::Log, "log"},
{at::log10, UnaryOpType::Log10, "log10"},
{at::log1p, UnaryOpType::Log1p, "log1p"},
{at::log2, UnaryOpType::Log2, "log2"},
{at::neg, UnaryOpType::Neg, "neg"},
{at::reciprocal, UnaryOpType::Reciprocal, "reciprocal"},
{at::relu, UnaryOpType::Relu, "relu"},
{at::round, UnaryOpType::Round, "round"},
{at::rsqrt, UnaryOpType::Rsqrt, "rsqrt"},
{at::sigmoid, UnaryOpType::Sigmoid, "sigmoid"},
{at::sin, UnaryOpType::Sin, "sin"},
{at::sinh, UnaryOpType::Sinh, "sinh"},
{at::sqrt, UnaryOpType::Sqrt, "sqrt"},
{at::tan, UnaryOpType::Tan, "tan"},
{at::tanh, UnaryOpType::Tanh, "tanh"},
{at::trunc, UnaryOpType::Trunc, "trunc"}};

std::for_each(ops.begin(), ops.end(), [](OpTuple& op) {
test_op(
Expand Down Expand Up @@ -2680,14 +2676,13 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) {
using AtenFuncSig = at::Tensor (*)(const at::Tensor&, const at::Tensor&);
using OpTuple = std::tuple<AtenFuncSig, BinaryOpType, std::string>;

// see [Note: explicit tuple type for uniform initialization list]
std::vector<OpTuple> logic_ops{
OpTuple{at::eq, BinaryOpType::Eq, "eq"},
OpTuple{at::ge, BinaryOpType::GE, "ge"},
OpTuple{at::gt, BinaryOpType::GT, "gt"},
OpTuple{at::le, BinaryOpType::LE, "le"},
OpTuple{at::lt, BinaryOpType::LT, "lt"},
OpTuple{at::ne, BinaryOpType::NE, "ne"}};
{at::eq, BinaryOpType::Eq, "eq"},
{at::ge, BinaryOpType::GE, "ge"},
{at::gt, BinaryOpType::GT, "gt"},
{at::le, BinaryOpType::LE, "le"},
{at::lt, BinaryOpType::LT, "lt"},
{at::ne, BinaryOpType::NE, "ne"}};

std::for_each(logic_ops.begin(), logic_ops.end(), [](OpTuple& op) {
test_op(
Expand All @@ -2709,18 +2704,17 @@ TEST(NVFuserTest, FusionBinaryOps_CUDA) {
std::make_pair(ValType::TensorView, DataType::Float)));
});

// see [Note: explicit tuple type for uniform initialization list]
std::vector<OpTuple> math_ops{
OpTuple{at::atan2, BinaryOpType::Atan2, "atan2"},
OpTuple{at::div, BinaryOpType::Div, "div"},
OpTuple{at::fmod, BinaryOpType::Fmod, "fmod"},
OpTuple{at::max, BinaryOpType::Max, "max"},
OpTuple{at::min, BinaryOpType::Min, "min"},
OpTuple{at::mul, BinaryOpType::Mul, "mul"},
OpTuple{at::pow, BinaryOpType::Pow, "pow"},
{at::atan2, BinaryOpType::Atan2, "atan2"},
{at::div, BinaryOpType::Div, "div"},
{at::fmod, BinaryOpType::Fmod, "fmod"},
{at::max, BinaryOpType::Max, "max"},
{at::min, BinaryOpType::Min, "min"},
{at::mul, BinaryOpType::Mul, "mul"},
{at::pow, BinaryOpType::Pow, "pow"},
// NOTE: Remainder does not match the Aten impl exactly
// despite using an identical function.
OpTuple{at::remainder, BinaryOpType::Remainder, "remainder"},
{at::remainder, BinaryOpType::Remainder, "remainder"},
};

std::for_each(math_ops.begin(), math_ops.end(), [](OpTuple& op) {
Expand Down
3 changes: 1 addition & 2 deletions torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@
try:
ctypes.CDLL('vcruntime140.dll')
ctypes.CDLL('msvcp140.dll')
if cuda_version not in ('9.2', '10.0'):
ctypes.CDLL('vcruntime140_1.dll')
ctypes.CDLL('vcruntime140_1.dll')
except OSError:
print('''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe''')
Expand Down
4 changes: 2 additions & 2 deletions torch/autograd/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,8 @@ def parse_nvprof_trace(path):
unique = EnforceUnique()
for row in conn.execute(kernel_query):
unique.see(row['marker_id'], row['runtime_id'])
# 211 is cudaKernelLaunch for cuda >= 9.2; 13 is for older cuda versions
assert (row['cbid'] == 211) or (row['cbid'] == 13)
# 211 is cudaKernelLaunch for cuda >= 9.2
assert (row['cbid'] == 211)
evt = functions_map[row['marker_id']]
evt.append_kernel(row['kernel_name'],
0,
Expand Down
15 changes: 4 additions & 11 deletions torch/csrc/api/include/torch/nn/cloneable.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,8 @@ class Cloneable : public virtual Module {
copy->buffers_.clear();
copy->children_.clear();
copy->reset();
// [[this pointer note]]
// Don't remove 'this' pointer, nvcc needs it to be explicitly given in some envs.
// eg. ubuntu 16.04 + gcc 5.x + cuda 9.2
// ubuntu 16.04 + gcc 7.x + cuda 9.2
TORCH_CHECK(
copy->parameters_.size() == this->parameters_.size(),
copy->parameters_.size() == parameters_.size(),
"The cloned module does not have the same number of "
"parameters as the original module after calling reset(). "
"Are you sure you called register_parameter() inside reset() "
Expand All @@ -58,9 +54,8 @@ class Cloneable : public virtual Module {
tensor.to(*device) : autograd::Variable(tensor).clone();
copy->parameters_[parameter.key()].set_data(data);
}
// Don't remove 'this' pointer. See [[this pointer note]]
TORCH_CHECK(
copy->buffers_.size() == this->buffers_.size(),
copy->buffers_.size() == buffers_.size(),
"The cloned module does not have the same number of "
"buffers as the original module after calling reset(). "
"Are you sure you called register_buffer() inside reset() "
Expand All @@ -71,15 +66,13 @@ class Cloneable : public virtual Module {
tensor.to(*device) : autograd::Variable(tensor).clone();
copy->buffers_[buffer.key()].set_data(data);
}
// Don't remove 'this' pointer. See [[this pointer note]]
TORCH_CHECK(
copy->children_.size() == this->children_.size(),
copy->children_.size() == children_.size(),
"The cloned module does not have the same number of "
"child modules as the original module after calling reset(). "
"Are you sure you called register_module() inside reset() "
"and not the constructor?");
// Don't remove 'this' pointer. See [[this pointer note]]
for (const auto& child : this->children_) {
for (const auto& child : children_) {
copy->children_[child.key()]->clone_(*child.value(), device);
}
return copy;
Expand Down

0 comments on commit 1ee66a5

Please sign in to comment.