Tensor reinitialization codemod - 5/5 (pytorch#15884)

Summary: Pull Request resolved: pytorch#15884 Codemod generated with clangr shard mode, 25 files per diff, To eliminiate partially initialized Tensor, we split the initialization of local Tensor variables into two steps, first declare un uninitialized Tensor, and call `ReinitializeTensor` to initialize it. motivation: pytorch#12407 Reviewed By: hyuen Differential Revision: D13586737 fbshipit-source-id: dc8e49e9f29505b8898bb19f84c1a983f2d811ab
bowu1999 · Jan 11, 2019 · 890568a · 890568a
1 parent e46e572
commit 890568a
Showing 13 changed files with 82 additions and 57 deletions.
diff --git a/caffe2/quantization/server/fully_connected_fake_lowp_op.cc b/caffe2/quantization/server/fully_connected_fake_lowp_op.cc
@@ -126,7 +126,10 @@ bool FullyConnectedFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
   // Add bias term
   if (bias_multiplier_.size() != M) {
     // If the helper bias multiplier is not M, reshape and fill it with one.
-    bias_multiplier_.Resize(M);
+    ReinitializeTensor(
+        &bias_multiplier_,
+        {M},
+        at::dtype<T_B>().device(Context::GetDeviceType()));
     math::Set<T_B, Context>(
         M,
         convert::To<float, T_B>(1),
@@ -245,7 +248,10 @@ bool FullyConnectedGradientFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
   if (bias_multiplier_.size() != M) {
     // If the helper bias multiplier is not M, reshape and fill it
     // with one.
-    bias_multiplier_.Resize(M);
+    ReinitializeTensor(
+        &bias_multiplier_,
+        {M},
+        at::dtype<T_B>().device(Context::GetDeviceType()));
     math::Set<T_B, Context>(
         M,
         convert::To<float, T_B>(1),

diff --git a/caffe2/quantization/server/fully_connected_fake_lowp_op.h b/caffe2/quantization/server/fully_connected_fake_lowp_op.h
@@ -81,7 +81,7 @@ class FullyConnectedFakeLowpFPOp final : public Operator<Context> {
   // A local vector to cache the output shape so we don't need to recreate
   // a vector object every time we run Run().
   vector<int64_t> Y_shape_cache_;
-  Tensor bias_multiplier_{Context::GetDeviceType()};
+  Tensor bias_multiplier_;
 
   bool float16_compute_;
 };
@@ -130,7 +130,7 @@ class FullyConnectedGradientFakeLowpFPOp : public Operator<Context> {
  protected:
   size_t axis_{1};
   size_t axis_w_{1};
-  Tensor bias_multiplier_{Context::GetDeviceType()};
+  Tensor bias_multiplier_;
   bool float16_compute_;
 };
 

diff --git a/caffe2/sgd/lars_op.h b/caffe2/sgd/lars_op.h
@@ -31,10 +31,10 @@ class LarsOp final : public Operator<Context> {
 
     auto* lr_rescaled = Output(0, vector<int64_t>{1}, at::dtype<T>());
 
-    X_norm_tensor_.Resize(1);
+    ReinitializeTensor(&X_norm_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType()));
     T* X_norm_ = X_norm_tensor_.template mutable_data<T>();
 
-    dX_norm_tensor_.Resize(1);
+    ReinitializeTensor(&dX_norm_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType()));
     T* dX_norm_ = dX_norm_tensor_.template mutable_data<T>();
 
     ComputeNorms(
@@ -84,8 +84,8 @@ class LarsOp final : public Operator<Context> {
   T offset_;
   T lr_min_;
 
-  Tensor X_norm_tensor_{Context::GetDeviceType()};
-  Tensor dX_norm_tensor_{Context::GetDeviceType()};
+  Tensor X_norm_tensor_;
+  Tensor dX_norm_tensor_;
 };
 
 } // namespace caffe2

diff --git a/caffe2/sgd/yellowfin_op.h b/caffe2/sgd/yellowfin_op.h
@@ -180,8 +180,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
     distance_avg_out_ = ++out_memory_it;
 
 #define CAFFE2_YF_INIT_VECTOR(NAME) \
-  NAME##_tensor_.Resize(D_);        \
-  NAME##_ = NAME##_tensor_.template mutable_data<T>();
+    ReinitializeTensor(&NAME##_tensor_, {D_}, at::dtype<T>().device(Context::GetDeviceType())); \
+    NAME##_ = NAME##_tensor_.template mutable_data<T>();
 
     CAFFE2_YF_INIT_VECTOR(aux_vector)
     CAFFE2_YF_INIT_VECTOR(g_deb)
@@ -190,8 +190,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
 #undef CAFFE2_YF_INIT_VECTOR
 
 #define CAFFE2_YF_INIT_SCALAR(NAME) \
-  NAME##_tensor_.Resize(1);         \
-  NAME##_ = NAME##_tensor_.template mutable_data<T>();
+      ReinitializeTensor(&NAME##_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType())); \
+      NAME##_ = NAME##_tensor_.template mutable_data<T>();
 
     CAFFE2_YF_INIT_SCALAR(aux_scalar)
     CAFFE2_YF_INIT_SCALAR(distance)
@@ -229,8 +229,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
   int D_;
 
 // Temporary memory on device, listed all variables used in calculations
-#define CAFFE2_YF_DEFINE_TENSOR(NAME)              \
-  Tensor NAME##_tensor_{Context::GetDeviceType()}; \
+#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
+  Tensor NAME##_tensor_;              \
   T* NAME##_;
 
   CAFFE2_YF_DEFINE_TENSOR(aux_vector)

diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h
@@ -19,6 +19,7 @@ extern "C" {
 
 namespace caffe2 {
 
+// TODO: Change dims related arguments to int64_t?
 class Tensor;
 
 // An empty class as a placeholder for a math function that has no specific

diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc
@@ -171,9 +171,12 @@ class GemmBatchedTest
  protected:
   void SetUp() override {
     cpu_context_ = make_unique<CPUContext>(option_);
-    X_.Resize(std::vector<int64_t>{3, 5, 10});
-    W_.Resize(std::vector<int64_t>{3, 6, 10});
-    Y_.Resize(std::vector<int64_t>{3, 5, 6});
+    ReinitializeTensor(
+        &X_, std::vector<int64_t>{3, 5, 10}, at::dtype<float>().device(CPU));
+    ReinitializeTensor(
+        &W_, std::vector<int64_t>{3, 6, 10}, at::dtype<float>().device(CPU));
+    ReinitializeTensor(
+        &Y_, std::vector<int64_t>{3, 5, 6}, at::dtype<float>().device(CPU));
     math::Set<float, CPUContext>(
         X_.numel(), 1, X_.mutable_data<float>(), cpu_context_.get());
     math::Set<float, CPUContext>(
@@ -243,9 +246,9 @@ class GemmBatchedTest
 
   DeviceOption option_;
   std::unique_ptr<CPUContext> cpu_context_;
-  Tensor X_{CPU};
-  Tensor W_{CPU};
-  Tensor Y_{CPU};
+  Tensor X_;
+  Tensor W_;
+  Tensor Y_;
   bool trans_X_;
   bool trans_W_;
 };
@@ -440,8 +443,12 @@ class ReduceTensorTest : public testing::Test {
     for (const int axis : axes) {
       Y_dims[axis] = 1;
     }
-    X_.Resize(X_dims);
-    Y_.Resize(Y_dims);
+    std::vector<int64_t> X_dims_64;
+    std::vector<int64_t> Y_dims_64;
+    std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
+    ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
     ASSERT_EQ(X_data.size(), X_.numel());
     cpu_context_->CopyFromCPU<float>(
         X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -462,8 +469,8 @@ class ReduceTensorTest : public testing::Test {
 
   DeviceOption option_;
   std::unique_ptr<CPUContext> cpu_context_;
-  Tensor X_{CPU};
-  Tensor Y_{CPU};
+  Tensor X_;
+  Tensor Y_;
 };
 
 TEST_F(ReduceTensorTest, ReduceMinTest) {
@@ -677,8 +684,12 @@ class BroadcastTest : public testing::Test {
       const std::vector<int>& Y_dims,
       const std::vector<float>& X_data,
       const std::vector<float>& Y_data) {
-    X_.Resize(X_dims);
-    Y_.Resize(Y_dims);
+    std::vector<int64_t> X_dims_64;
+    std::vector<int64_t> Y_dims_64;
+    std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
+    ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
     ASSERT_EQ(X_data.size(), X_.numel());
     cpu_context_->CopyFromCPU<float>(
         X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -700,8 +711,8 @@ class BroadcastTest : public testing::Test {
   DeviceOption option_;
   std::unique_ptr<CPUContext> cpu_context_;
 
-  Tensor X_{CPU};
-  Tensor Y_{CPU};
+  Tensor X_;
+  Tensor Y_;
 };
 
 TEST_F(BroadcastTest, BroadcastFloatTest) {
@@ -748,9 +759,13 @@ class MomentsTest : public testing::Test {
     for (const int axis : axes) {
       Y_dims[axis] = 1;
     }
-    X_.Resize(X_dims);
-    mean_.Resize(Y_dims);
-    variance_.Resize(Y_dims);
+    std::vector<int64_t> X_dims_64;
+    std::vector<int64_t> Y_dims_64;
+    std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
+    ReinitializeTensor(&mean_, Y_dims_64, at::dtype<float>().device(CPU));
+    ReinitializeTensor(&variance_, Y_dims_64, at::dtype<float>().device(CPU));
     ASSERT_EQ(X_data.size(), X_.numel());
     cpu_context_->CopyFromCPU<float>(
         X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -776,9 +791,9 @@ class MomentsTest : public testing::Test {
   DeviceOption option_;
   std::unique_ptr<CPUContext> cpu_context_;
 
-  Tensor X_{CPU};
-  Tensor mean_{CPU};
-  Tensor variance_{CPU};
+  Tensor X_;
+  Tensor mean_;
+  Tensor variance_;
 };
 
 TEST_F(MomentsTest, MomentsFloatTest) {
@@ -842,8 +857,12 @@ class TransposeTest : public testing::Test {
     for (int i = 0; i < ndim; ++i) {
       Y_dims[i] = X_dims[axes[i]];
     }
-    X_.Resize(X_dims);
-    Y_.Resize(Y_dims);
+    std::vector<int64_t> X_dims_64;
+    std::vector<int64_t> Y_dims_64;
+    std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
+    ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
     ASSERT_EQ(X_data.size(), X_.numel());
     cpu_context_->CopyFromCPU<float>(
         X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -863,8 +882,8 @@ class TransposeTest : public testing::Test {
   DeviceOption option_;
   std::unique_ptr<CPUContext> cpu_context_;
 
-  Tensor X_{CPU};
-  Tensor Y_{CPU};
+  Tensor X_;
+  Tensor Y_;
 };
 
 TEST_F(TransposeTest, TransposeFloatTest) {

diff --git a/caffe2/video/video_input_op.h b/caffe2/video/video_input_op.h
@@ -51,10 +51,10 @@ class VideoInputOp final : public PrefetchOperator<Context> {
       std::bernoulli_distribution* mirror_this_clip);
 
   const db::DBReader* reader_;
-  Tensor prefetched_clip_rgb_{CPU};
-  Tensor prefetched_clip_of_{CPU};
-  Tensor prefetched_label_{CPU};
-  Tensor prefetched_video_id_{CPU};
+  Tensor prefetched_clip_rgb_;
+  Tensor prefetched_clip_of_;
+  Tensor prefetched_label_;
+  Tensor prefetched_video_id_;
   Tensor prefetched_clip_rgb_on_device_{Context::GetDeviceType()};
   Tensor prefetched_clip_of_on_device_{Context::GetDeviceType()};
   Tensor prefetched_label_on_device_{Context::GetDeviceType()};
@@ -470,26 +470,25 @@ VideoInputOp<Context>::VideoInputOp(
   data_shape[2] = length_rgb_;
   data_shape[3] = crop_height_;
   data_shape[4] = crop_width_;
-  prefetched_clip_rgb_.Resize(data_shape);
+  ReinitializeTensor(&prefetched_clip_rgb_, data_shape, at::dtype<float>().device(CPU));
 
   // for optical flow data
   data_shape[1] = channels_of_;
   data_shape[2] = length_of_;
-  prefetched_clip_of_.Resize(data_shape);
+  ReinitializeTensor(&prefetched_clip_of_, data_shape, at::dtype<float>().device(CPU));
 
   // If do_multi_label is used, output label is a binary vector
   // of length num_of_class indicating which labels present
   if (do_multi_label_) {
     label_shape[0] = batch_size_ * clip_per_video_ * multi_crop_count_;
     label_shape[1] = num_of_class_;
-    prefetched_label_.Resize(label_shape);
+    ReinitializeTensor(&prefetched_label_, label_shape, at::dtype<int>().device(CPU));
   } else {
     prefetched_label_.Resize(
         vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_));
   }
 
-  prefetched_video_id_.Resize(
-      vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_));
+  ReinitializeTensor(&prefetched_video_id_,  vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_), at::dtype<int>().device(CPU));
 }
 
 template <class Context>

diff --git a/modules/detectron/group_spatial_softmax_op.cu b/modules/detectron/group_spatial_softmax_op.cu
@@ -134,7 +134,7 @@ bool GroupSpatialSoftmaxGradientOp<float, CUDAContext>::RunOnDevice() {
   dX->ResizeLike(Y);
 
   if (sum_probs_.size() != N * A * H * W) {
-    sum_probs_.Resize(N * A * H * W);
+    ReinitializeTensor(&sum_probs_, {N * A * H * W}, at::dtype<float>().device(CUDA));
   }
 
   const float* Ydata = Y.data<float>();

diff --git a/modules/detectron/group_spatial_softmax_op.h b/modules/detectron/group_spatial_softmax_op.h
@@ -68,7 +68,7 @@ class GroupSpatialSoftmaxGradientOp final : public Operator<Context> {
  protected:
   int num_classes_;
   StorageOrder order_;
-  Tensor sum_probs_{Context::GetDeviceType()};
+  Tensor sum_probs_;
 };
 
 } // namespace caffe2

diff --git a/modules/detectron/sigmoid_cross_entropy_loss_op.cu b/modules/detectron/sigmoid_cross_entropy_loss_op.cu
@@ -82,7 +82,7 @@ bool SigmoidCrossEntropyLossOp<float, CUDAContext>::RunOnDevice() {
   auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
   counts_.ResizeLike(X);
   losses_.ResizeLike(X);
-  normalizer_.Resize(vector<int64_t>());
+  ReinitializeTensor(&normalizer_, vector<int64_t>(), at::dtype<float>().device(CUDA));
   SigmoidCrossEntropyLossKernel<<<
       CAFFE_GET_BLOCKS(X.size()),
       CAFFE_CUDA_NUM_THREADS,
@@ -124,7 +124,7 @@ bool SigmoidCrossEntropyLossGradientOp<float, CUDAContext>::RunOnDevice() {
 
   dX->ResizeLike(X);
   counts_.ResizeLike(X);
-  normalizer_.Resize(vector<int64_t>());
+  ReinitializeTensor(&normalizer_, vector<int64_t>(), at::dtype<float>().device(CUDA));
   SigmoidCrossEntropyLossGradientKernel<<<
       CAFFE_GET_BLOCKS(X.size()),
       CAFFE_CUDA_NUM_THREADS,

diff --git a/modules/detectron/sigmoid_cross_entropy_loss_op.h b/modules/detectron/sigmoid_cross_entropy_loss_op.h
@@ -46,7 +46,7 @@ class SigmoidCrossEntropyLossOp final : public Operator<Context> {
   int normalize_;
   Tensor losses_{Context::GetDeviceType()};
   Tensor counts_{Context::GetDeviceType()};
-  Tensor normalizer_{Context::GetDeviceType()};
+  Tensor normalizer_;
 };
 
 template <typename T, class Context>
@@ -70,7 +70,7 @@ class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> {
   float scale_;
   int normalize_;
   Tensor counts_{Context::GetDeviceType()};
-  Tensor normalizer_{Context::GetDeviceType()};
+  Tensor normalizer_;
 };
 
 } // namespace caffe2

diff --git a/modules/detectron/softmax_focal_loss_op.cu b/modules/detectron/softmax_focal_loss_op.cu
@@ -156,7 +156,7 @@ bool SoftmaxFocalLossOp<float, CUDAContext>::RunOnDevice() {
   int W = X.dim32(3);
   int A = D / num_classes_;
 
-  losses_.Resize(N * A * H * W);
+  ReinitializeTensor(&losses_, {N * A * H * W}, at::dtype<float>().device(CUDA));
   auto* P = Output(1, {N * D * H * W}, at::dtype<float>());
   auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
   math::Set<float, CUDAContext>(
@@ -212,7 +212,7 @@ bool SoftmaxFocalLossGradientOp<float, CUDAContext>::RunOnDevice() {
   int W = X.dim32(3);
   int A = D / num_classes_;
 
-  buff_.Resize(N * A * H * W);
+  ReinitializeTensor(&buff_, {N * A * H * W}, at::dtype<float>().device(CUDA));
 
   dX->ResizeLike(X);
 

diff --git a/modules/detectron/softmax_focal_loss_op.h b/modules/detectron/softmax_focal_loss_op.h
@@ -52,7 +52,7 @@ class SoftmaxFocalLossOp final : public Operator<Context> {
   float alpha_;
   int num_classes_;
   StorageOrder order_;
-  Tensor losses_{Context::GetDeviceType()};
+  Tensor losses_;
 };
 
 template <typename T, class Context>
@@ -83,7 +83,7 @@ class SoftmaxFocalLossGradientOp final : public Operator<Context> {
   float alpha_;
   int num_classes_;
   StorageOrder order_;
-  Tensor buff_{Context::GetDeviceType()};
+  Tensor buff_;
 };
 
 } // namespace caffe2