Skip to content

Commit

Permalink
Tensor reinitialization codemod - 5/5 (pytorch#15884)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#15884

Codemod generated with clangr shard mode, 25 files per diff,
To eliminiate partially initialized Tensor, we split the initialization of local Tensor variables into two steps, first declare un uninitialized Tensor, and
call `ReinitializeTensor` to initialize it.
motivation: pytorch#12407

Reviewed By: hyuen

Differential Revision: D13586737

fbshipit-source-id: dc8e49e9f29505b8898bb19f84c1a983f2d811ab
  • Loading branch information
jerryzh168 authored and facebook-github-bot committed Jan 11, 2019
1 parent e46e572 commit 890568a
Showing 13 changed files with 82 additions and 57 deletions.
10 changes: 8 additions & 2 deletions caffe2/quantization/server/fully_connected_fake_lowp_op.cc
Original file line number Diff line number Diff line change
@@ -126,7 +126,10 @@ bool FullyConnectedFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
// Add bias term
if (bias_multiplier_.size() != M) {
// If the helper bias multiplier is not M, reshape and fill it with one.
bias_multiplier_.Resize(M);
ReinitializeTensor(
&bias_multiplier_,
{M},
at::dtype<T_B>().device(Context::GetDeviceType()));
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
@@ -245,7 +248,10 @@ bool FullyConnectedGradientFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
if (bias_multiplier_.size() != M) {
// If the helper bias multiplier is not M, reshape and fill it
// with one.
bias_multiplier_.Resize(M);
ReinitializeTensor(
&bias_multiplier_,
{M},
at::dtype<T_B>().device(Context::GetDeviceType()));
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
4 changes: 2 additions & 2 deletions caffe2/quantization/server/fully_connected_fake_lowp_op.h
Original file line number Diff line number Diff line change
@@ -81,7 +81,7 @@ class FullyConnectedFakeLowpFPOp final : public Operator<Context> {
// A local vector to cache the output shape so we don't need to recreate
// a vector object every time we run Run().
vector<int64_t> Y_shape_cache_;
Tensor bias_multiplier_{Context::GetDeviceType()};
Tensor bias_multiplier_;

bool float16_compute_;
};
@@ -130,7 +130,7 @@ class FullyConnectedGradientFakeLowpFPOp : public Operator<Context> {
protected:
size_t axis_{1};
size_t axis_w_{1};
Tensor bias_multiplier_{Context::GetDeviceType()};
Tensor bias_multiplier_;
bool float16_compute_;
};

8 changes: 4 additions & 4 deletions caffe2/sgd/lars_op.h
Original file line number Diff line number Diff line change
@@ -31,10 +31,10 @@ class LarsOp final : public Operator<Context> {

auto* lr_rescaled = Output(0, vector<int64_t>{1}, at::dtype<T>());

X_norm_tensor_.Resize(1);
ReinitializeTensor(&X_norm_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType()));
T* X_norm_ = X_norm_tensor_.template mutable_data<T>();

dX_norm_tensor_.Resize(1);
ReinitializeTensor(&dX_norm_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType()));
T* dX_norm_ = dX_norm_tensor_.template mutable_data<T>();

ComputeNorms(
@@ -84,8 +84,8 @@ class LarsOp final : public Operator<Context> {
T offset_;
T lr_min_;

Tensor X_norm_tensor_{Context::GetDeviceType()};
Tensor dX_norm_tensor_{Context::GetDeviceType()};
Tensor X_norm_tensor_;
Tensor dX_norm_tensor_;
};

} // namespace caffe2
12 changes: 6 additions & 6 deletions caffe2/sgd/yellowfin_op.h
Original file line number Diff line number Diff line change
@@ -180,8 +180,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
distance_avg_out_ = ++out_memory_it;

#define CAFFE2_YF_INIT_VECTOR(NAME) \
NAME##_tensor_.Resize(D_); \
NAME##_ = NAME##_tensor_.template mutable_data<T>();
ReinitializeTensor(&NAME##_tensor_, {D_}, at::dtype<T>().device(Context::GetDeviceType())); \
NAME##_ = NAME##_tensor_.template mutable_data<T>();

CAFFE2_YF_INIT_VECTOR(aux_vector)
CAFFE2_YF_INIT_VECTOR(g_deb)
@@ -190,8 +190,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
#undef CAFFE2_YF_INIT_VECTOR

#define CAFFE2_YF_INIT_SCALAR(NAME) \
NAME##_tensor_.Resize(1); \
NAME##_ = NAME##_tensor_.template mutable_data<T>();
ReinitializeTensor(&NAME##_tensor_, {1}, at::dtype<T>().device(Context::GetDeviceType())); \
NAME##_ = NAME##_tensor_.template mutable_data<T>();

CAFFE2_YF_INIT_SCALAR(aux_scalar)
CAFFE2_YF_INIT_SCALAR(distance)
@@ -229,8 +229,8 @@ for (int i = 0; i < param_tensor.dim(); ++i) {
int D_;

// Temporary memory on device, listed all variables used in calculations
#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
Tensor NAME##_tensor_{Context::GetDeviceType()}; \
#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
Tensor NAME##_tensor_; \
T* NAME##_;

CAFFE2_YF_DEFINE_TENSOR(aux_vector)
1 change: 1 addition & 0 deletions caffe2/utils/math.h
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ extern "C" {

namespace caffe2 {

// TODO: Change dims related arguments to int64_t?
class Tensor;

// An empty class as a placeholder for a math function that has no specific
67 changes: 43 additions & 24 deletions caffe2/utils/math_test.cc
Original file line number Diff line number Diff line change
@@ -171,9 +171,12 @@ class GemmBatchedTest
protected:
void SetUp() override {
cpu_context_ = make_unique<CPUContext>(option_);
X_.Resize(std::vector<int64_t>{3, 5, 10});
W_.Resize(std::vector<int64_t>{3, 6, 10});
Y_.Resize(std::vector<int64_t>{3, 5, 6});
ReinitializeTensor(
&X_, std::vector<int64_t>{3, 5, 10}, at::dtype<float>().device(CPU));
ReinitializeTensor(
&W_, std::vector<int64_t>{3, 6, 10}, at::dtype<float>().device(CPU));
ReinitializeTensor(
&Y_, std::vector<int64_t>{3, 5, 6}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(
X_.numel(), 1, X_.mutable_data<float>(), cpu_context_.get());
math::Set<float, CPUContext>(
@@ -243,9 +246,9 @@ class GemmBatchedTest

DeviceOption option_;
std::unique_ptr<CPUContext> cpu_context_;
Tensor X_{CPU};
Tensor W_{CPU};
Tensor Y_{CPU};
Tensor X_;
Tensor W_;
Tensor Y_;
bool trans_X_;
bool trans_W_;
};
@@ -440,8 +443,12 @@ class ReduceTensorTest : public testing::Test {
for (const int axis : axes) {
Y_dims[axis] = 1;
}
X_.Resize(X_dims);
Y_.Resize(Y_dims);
std::vector<int64_t> X_dims_64;
std::vector<int64_t> Y_dims_64;
std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
ASSERT_EQ(X_data.size(), X_.numel());
cpu_context_->CopyFromCPU<float>(
X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -462,8 +469,8 @@ class ReduceTensorTest : public testing::Test {

DeviceOption option_;
std::unique_ptr<CPUContext> cpu_context_;
Tensor X_{CPU};
Tensor Y_{CPU};
Tensor X_;
Tensor Y_;
};

TEST_F(ReduceTensorTest, ReduceMinTest) {
@@ -677,8 +684,12 @@ class BroadcastTest : public testing::Test {
const std::vector<int>& Y_dims,
const std::vector<float>& X_data,
const std::vector<float>& Y_data) {
X_.Resize(X_dims);
Y_.Resize(Y_dims);
std::vector<int64_t> X_dims_64;
std::vector<int64_t> Y_dims_64;
std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
ASSERT_EQ(X_data.size(), X_.numel());
cpu_context_->CopyFromCPU<float>(
X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -700,8 +711,8 @@ class BroadcastTest : public testing::Test {
DeviceOption option_;
std::unique_ptr<CPUContext> cpu_context_;

Tensor X_{CPU};
Tensor Y_{CPU};
Tensor X_;
Tensor Y_;
};

TEST_F(BroadcastTest, BroadcastFloatTest) {
@@ -748,9 +759,13 @@ class MomentsTest : public testing::Test {
for (const int axis : axes) {
Y_dims[axis] = 1;
}
X_.Resize(X_dims);
mean_.Resize(Y_dims);
variance_.Resize(Y_dims);
std::vector<int64_t> X_dims_64;
std::vector<int64_t> Y_dims_64;
std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
ReinitializeTensor(&mean_, Y_dims_64, at::dtype<float>().device(CPU));
ReinitializeTensor(&variance_, Y_dims_64, at::dtype<float>().device(CPU));
ASSERT_EQ(X_data.size(), X_.numel());
cpu_context_->CopyFromCPU<float>(
X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -776,9 +791,9 @@ class MomentsTest : public testing::Test {
DeviceOption option_;
std::unique_ptr<CPUContext> cpu_context_;

Tensor X_{CPU};
Tensor mean_{CPU};
Tensor variance_{CPU};
Tensor X_;
Tensor mean_;
Tensor variance_;
};

TEST_F(MomentsTest, MomentsFloatTest) {
@@ -842,8 +857,12 @@ class TransposeTest : public testing::Test {
for (int i = 0; i < ndim; ++i) {
Y_dims[i] = X_dims[axes[i]];
}
X_.Resize(X_dims);
Y_.Resize(Y_dims);
std::vector<int64_t> X_dims_64;
std::vector<int64_t> Y_dims_64;
std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64));
std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU));
ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU));
ASSERT_EQ(X_data.size(), X_.numel());
cpu_context_->CopyFromCPU<float>(
X_data.size(), X_data.data(), X_.mutable_data<float>());
@@ -863,8 +882,8 @@ class TransposeTest : public testing::Test {
DeviceOption option_;
std::unique_ptr<CPUContext> cpu_context_;

Tensor X_{CPU};
Tensor Y_{CPU};
Tensor X_;
Tensor Y_;
};

TEST_F(TransposeTest, TransposeFloatTest) {
17 changes: 8 additions & 9 deletions caffe2/video/video_input_op.h
Original file line number Diff line number Diff line change
@@ -51,10 +51,10 @@ class VideoInputOp final : public PrefetchOperator<Context> {
std::bernoulli_distribution* mirror_this_clip);

const db::DBReader* reader_;
Tensor prefetched_clip_rgb_{CPU};
Tensor prefetched_clip_of_{CPU};
Tensor prefetched_label_{CPU};
Tensor prefetched_video_id_{CPU};
Tensor prefetched_clip_rgb_;
Tensor prefetched_clip_of_;
Tensor prefetched_label_;
Tensor prefetched_video_id_;
Tensor prefetched_clip_rgb_on_device_{Context::GetDeviceType()};
Tensor prefetched_clip_of_on_device_{Context::GetDeviceType()};
Tensor prefetched_label_on_device_{Context::GetDeviceType()};
@@ -470,26 +470,25 @@ VideoInputOp<Context>::VideoInputOp(
data_shape[2] = length_rgb_;
data_shape[3] = crop_height_;
data_shape[4] = crop_width_;
prefetched_clip_rgb_.Resize(data_shape);
ReinitializeTensor(&prefetched_clip_rgb_, data_shape, at::dtype<float>().device(CPU));

// for optical flow data
data_shape[1] = channels_of_;
data_shape[2] = length_of_;
prefetched_clip_of_.Resize(data_shape);
ReinitializeTensor(&prefetched_clip_of_, data_shape, at::dtype<float>().device(CPU));

// If do_multi_label is used, output label is a binary vector
// of length num_of_class indicating which labels present
if (do_multi_label_) {
label_shape[0] = batch_size_ * clip_per_video_ * multi_crop_count_;
label_shape[1] = num_of_class_;
prefetched_label_.Resize(label_shape);
ReinitializeTensor(&prefetched_label_, label_shape, at::dtype<int>().device(CPU));
} else {
prefetched_label_.Resize(
vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_));
}

prefetched_video_id_.Resize(
vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_));
ReinitializeTensor(&prefetched_video_id_, vector<int64_t>(1, batch_size_ * clip_per_video_ * multi_crop_count_), at::dtype<int>().device(CPU));
}

template <class Context>
2 changes: 1 addition & 1 deletion modules/detectron/group_spatial_softmax_op.cu
Original file line number Diff line number Diff line change
@@ -134,7 +134,7 @@ bool GroupSpatialSoftmaxGradientOp<float, CUDAContext>::RunOnDevice() {
dX->ResizeLike(Y);

if (sum_probs_.size() != N * A * H * W) {
sum_probs_.Resize(N * A * H * W);
ReinitializeTensor(&sum_probs_, {N * A * H * W}, at::dtype<float>().device(CUDA));
}

const float* Ydata = Y.data<float>();
2 changes: 1 addition & 1 deletion modules/detectron/group_spatial_softmax_op.h
Original file line number Diff line number Diff line change
@@ -68,7 +68,7 @@ class GroupSpatialSoftmaxGradientOp final : public Operator<Context> {
protected:
int num_classes_;
StorageOrder order_;
Tensor sum_probs_{Context::GetDeviceType()};
Tensor sum_probs_;
};

} // namespace caffe2
4 changes: 2 additions & 2 deletions modules/detectron/sigmoid_cross_entropy_loss_op.cu
Original file line number Diff line number Diff line change
@@ -82,7 +82,7 @@ bool SigmoidCrossEntropyLossOp<float, CUDAContext>::RunOnDevice() {
auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
counts_.ResizeLike(X);
losses_.ResizeLike(X);
normalizer_.Resize(vector<int64_t>());
ReinitializeTensor(&normalizer_, vector<int64_t>(), at::dtype<float>().device(CUDA));
SigmoidCrossEntropyLossKernel<<<
CAFFE_GET_BLOCKS(X.size()),
CAFFE_CUDA_NUM_THREADS,
@@ -124,7 +124,7 @@ bool SigmoidCrossEntropyLossGradientOp<float, CUDAContext>::RunOnDevice() {
dX->ResizeLike(X);
counts_.ResizeLike(X);
normalizer_.Resize(vector<int64_t>());
ReinitializeTensor(&normalizer_, vector<int64_t>(), at::dtype<float>().device(CUDA));
SigmoidCrossEntropyLossGradientKernel<<<
CAFFE_GET_BLOCKS(X.size()),
CAFFE_CUDA_NUM_THREADS,
4 changes: 2 additions & 2 deletions modules/detectron/sigmoid_cross_entropy_loss_op.h
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ class SigmoidCrossEntropyLossOp final : public Operator<Context> {
int normalize_;
Tensor losses_{Context::GetDeviceType()};
Tensor counts_{Context::GetDeviceType()};
Tensor normalizer_{Context::GetDeviceType()};
Tensor normalizer_;
};

template <typename T, class Context>
@@ -70,7 +70,7 @@ class SigmoidCrossEntropyLossGradientOp final : public Operator<Context> {
float scale_;
int normalize_;
Tensor counts_{Context::GetDeviceType()};
Tensor normalizer_{Context::GetDeviceType()};
Tensor normalizer_;
};

} // namespace caffe2
4 changes: 2 additions & 2 deletions modules/detectron/softmax_focal_loss_op.cu
Original file line number Diff line number Diff line change
@@ -156,7 +156,7 @@ bool SoftmaxFocalLossOp<float, CUDAContext>::RunOnDevice() {
int W = X.dim32(3);
int A = D / num_classes_;

losses_.Resize(N * A * H * W);
ReinitializeTensor(&losses_, {N * A * H * W}, at::dtype<float>().device(CUDA));
auto* P = Output(1, {N * D * H * W}, at::dtype<float>());
auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
math::Set<float, CUDAContext>(
@@ -212,7 +212,7 @@ bool SoftmaxFocalLossGradientOp<float, CUDAContext>::RunOnDevice() {
int W = X.dim32(3);
int A = D / num_classes_;

buff_.Resize(N * A * H * W);
ReinitializeTensor(&buff_, {N * A * H * W}, at::dtype<float>().device(CUDA));

dX->ResizeLike(X);

4 changes: 2 additions & 2 deletions modules/detectron/softmax_focal_loss_op.h
Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ class SoftmaxFocalLossOp final : public Operator<Context> {
float alpha_;
int num_classes_;
StorageOrder order_;
Tensor losses_{Context::GetDeviceType()};
Tensor losses_;
};

template <typename T, class Context>
@@ -83,7 +83,7 @@ class SoftmaxFocalLossGradientOp final : public Operator<Context> {
float alpha_;
int num_classes_;
StorageOrder order_;
Tensor buff_{Context::GetDeviceType()};
Tensor buff_;
};

} // namespace caffe2

0 comments on commit 890568a

Please sign in to comment.