Skip to content

Commit

Permalink
[pytorch][nnc] support custom class parameters (#59466)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #59466

Change saved parameter type from at::Tensor to at::IValue to support custom
class parameters, e.g. `__torch__.torch.classes.xnnpack.Conv2dOpContext`.

The NNC produced kernels won't deal with custom class parameters directly.
They simply pass through to the external operators that take these custom
class parameters, e.g. `prepacked::conv2d_clamp_run`.

It will reuse the `__getstate__` and `__setstate__` methods on the custom class
to persist and restore the state of the parameters.

When calling into the kernel, it will pass in the untyped raw pointer of the custom
class objects to the kernel as `void*`. It's similar to the regular tensor parameters,
for which it will pass in the raw data pointer of the tensor storage. The generated
kernel needs to hardcode the expected type for each parameter and cast before
calling the external ops.
ghstack-source-id: 131897904

Test Plan: - unit tests

Reviewed By: kimishpatel

Differential Revision: D28902496

fbshipit-source-id: 4b2c0895dd28f0b7d344aa08183d42ad6a355dae
  • Loading branch information
ljk53 authored and facebook-github-bot committed Jun 19, 2021
1 parent cac9ae1 commit 5824a86
Show file tree
Hide file tree
Showing 11 changed files with 275 additions and 27 deletions.
1 change: 0 additions & 1 deletion .jenkins/pytorch/macos-lite-interpreter-build-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ if [ "${BUILD_LITE_INTERPRETER}" == 1 ]; then
popd || exit

"${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
"${CPP_BUILD}/caffe2/build/bin/test_mobile_nnc"

# Change the permission manually from 755 to 644 to keep git clean
chmod 644 "${HOME}/project/.jenkins/pytorch/macos-lite-interpreter-build-test.sh"
Expand Down
1 change: 0 additions & 1 deletion .jenkins/pytorch/macos-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ test_jit_hooks() {
assert_git_not_dirty
}


if [ -z "${BUILD_ENVIRONMENT}" ] || [[ "${BUILD_ENVIRONMENT}" == *-test ]]; then
test_python_all
test_libtorch
Expand Down
1 change: 1 addition & 0 deletions .jenkins/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ test_libtorch() {
wait
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
build/bin/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
build/bin/test_mobile_nnc --gtest_output=xml:$TEST_REPORTS_DIR/test_mobile_nnc.xml
assert_git_not_dirty
fi
}
Expand Down
11 changes: 5 additions & 6 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,6 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
${TORCH_SRC_DIR}/csrc/jit/mobile/module.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/observer.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/interpreter.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/nnc/context.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/nnc/registry.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/train/export_data.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/train/optim/sgd.cpp
${TORCH_SRC_DIR}/csrc/jit/mobile/train/random.cpp
Expand Down Expand Up @@ -1039,10 +1037,6 @@ endif()
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime
${CMAKE_BINARY_DIR}/test_lite_interpreter_runtime
)
add_subdirectory(
${TORCH_ROOT}/test/mobile/nnc
${CMAKE_BINARY_DIR}/test_mobile_nnc
)
else()
add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
add_subdirectory(
Expand All @@ -1052,6 +1046,11 @@ endif()
if(USE_DISTRIBUTED AND NOT WIN32)
add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc)
endif()

add_subdirectory(
${TORCH_ROOT}/test/mobile/nnc
${CMAKE_BINARY_DIR}/test_mobile_nnc
)
endif()
endif()

Expand Down
4 changes: 3 additions & 1 deletion test/mobile/nnc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ set(MOBILE_NNC_TEST_ROOT ${TORCH_ROOT}/test/mobile/nnc)

set(MOBILE_NNC_TEST_SRCS
${MOBILE_NNC_TEST_ROOT}/test_context.cpp
${MOBILE_NNC_TEST_ROOT}/test_nnc_backend.cpp
${MOBILE_NNC_TEST_ROOT}/test_registry.cpp
)

add_executable(test_mobile_nnc
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
${MOBILE_NNC_TEST_SRCS})
${MOBILE_NNC_TEST_SRCS}
)

target_link_libraries(test_mobile_nnc PRIVATE torch gtest)
target_include_directories(test_mobile_nnc PRIVATE ${ATen_CPU_INCLUDE})
Expand Down
19 changes: 11 additions & 8 deletions test/mobile/nnc/test_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ TEST(Function, ExecuteSlowMul) {

f.set_nnc_kernel_id("slow_mul");
f.set_input_specs({create_test_input_spec({size})});
f.set_output_spec({create_test_output_spec({size})});
f.set_parameters({at::ones({1}, at::kInt).mul(n)});
f.set_output_specs({create_test_output_spec({size})});
f.set_parameters(c10::impl::toList(c10::List<at::Tensor>({
at::ones({1}, at::kInt).mul(n)
})));
f.set_memory_plan(create_test_memory_plan({sizeof(float) * size}));

c10::List<at::Tensor> input({
Expand All @@ -81,12 +83,13 @@ TEST(Function, Serialization) {
f.set_name("test_function");
f.set_nnc_kernel_id("test_kernel");
f.set_input_specs({create_test_input_spec({1, 3, 224, 224})});
f.set_output_spec({create_test_output_spec({1000})});
f.set_parameters({
f.set_output_specs({create_test_output_spec({1000})});

f.set_parameters(c10::impl::toList(c10::List<at::Tensor>({
at::ones({1, 16, 3, 3}, at::kFloat),
at::ones({16, 32, 1, 1}, at::kFloat),
at::ones({32, 1, 3, 3}, at::kFloat)
});
})));
f.set_memory_plan(create_test_memory_plan({
sizeof(float) * 1024,
sizeof(float) * 2048,
Expand All @@ -105,9 +108,9 @@ TEST(Function, Serialization) {
EXPECT_EQ(f2.output_specs()[0].dtype_, at::kFloat);

EXPECT_EQ(f2.parameters().size(), 3);
EXPECT_EQ(f2.parameters()[0].sizes(), at::IntArrayRef({1, 16, 3, 3}));
EXPECT_EQ(f2.parameters()[1].sizes(), at::IntArrayRef({16, 32, 1, 1}));
EXPECT_EQ(f2.parameters()[2].sizes(), at::IntArrayRef({32, 1, 3, 3}));
EXPECT_EQ(f2.parameters()[0].toTensor().sizes(), at::IntArrayRef({1, 16, 3, 3}));
EXPECT_EQ(f2.parameters()[1].toTensor().sizes(), at::IntArrayRef({16, 32, 1, 1}));
EXPECT_EQ(f2.parameters()[2].toTensor().sizes(), at::IntArrayRef({32, 1, 3, 3}));

EXPECT_EQ(f2.memory_plan().buffer_sizes_.size(), 2);
EXPECT_EQ(f2.memory_plan().buffer_sizes_[0], sizeof(float) * 1024);
Expand Down
236 changes: 236 additions & 0 deletions test/mobile/nnc/test_nnc_backend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
#include <gtest/gtest.h>
#include <torch/csrc/jit/backends/backend.h>
#include <torch/csrc/jit/backends/backend_detail.h>
#include <torch/csrc/jit/backends/backend_preprocess.h>
#include <torch/csrc/jit/frontend/resolver.h>
#include <torch/csrc/jit/mobile/import.h>
#include <torch/csrc/jit/mobile/module.h>
#include <torch/csrc/jit/mobile/nnc/context.h>
#include <torch/csrc/jit/mobile/nnc/registry.h>
#include <torch/custom_class.h>
#include <torch/script.h>
#include <ATen/Functions.h>

namespace torch {
namespace jit {
namespace mobile {
namespace nnc {

namespace {

c10::Dict<c10::IValue, c10::IValue> create_compile_spec(
const std::string& method_name,
const std::string& nnc_kernel_id,
const std::vector<std::vector<int64_t>>& input_shapes,
const std::vector<std::vector<int64_t>>& output_shapes,
const c10::impl::GenericList& parameters,
const std::vector<int64_t>& buffer_sizes) {
c10::Dict<c10::IValue, c10::IValue> method_spec(
c10::StringType::get(), c10::AnyType::get());
method_spec.insert("nnc_kernel_id", nnc_kernel_id);
method_spec.insert("input_sizes", input_shapes);
method_spec.insert("output_sizes", output_shapes);

// For testing purpose we don't call the real NNC so pass in these directly.
method_spec.insert("parameters", parameters);
method_spec.insert("buffer_sizes", buffer_sizes);

c10::Dict<c10::IValue, c10::IValue> compile_spec(
c10::StringType::get(), c10::AnyType::get());
compile_spec.insert(method_name, method_spec);
return compile_spec;
}

std::vector<mobile::nnc::InputSpec> get_input_specs(
const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec) {
auto input_shapes = method_compile_spec.at("input_sizes").toList();

std::vector<mobile::nnc::InputSpec> specs;
for (const auto& input_shape : input_shapes) {
mobile::nnc::InputSpec spec;
spec.sizes_ = ((c10::IValue) input_shape).toIntVector();
spec.dtype_ = c10::ScalarType::Float;
specs.emplace_back(std::move(spec));
}
return specs;
}

std::vector<mobile::nnc::OutputSpec> get_output_specs(
const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec) {
auto output_shapes = method_compile_spec.at("output_sizes").toList();

std::vector<mobile::nnc::OutputSpec> specs;
for (const auto& output_shape : output_shapes) {
mobile::nnc::OutputSpec spec;
spec.sizes_ = ((c10::IValue) output_shape).toIntVector();
spec.dtype_ = c10::ScalarType::Float;
specs.emplace_back(std::move(spec));
}
return specs;
}

// A fake NNC preprocess method, which only produces the compiled model but
// does not produce the assembly with the NNC compiler.
c10::IValue preprocess(
const torch::jit::Module& /* mod */,
const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec,
const torch::jit::BackendDebugHandleGenerator&) {
torch::jit::mobile::nnc::CompilationUnit cu;
for (const auto& entry : method_compile_spec) {
const std::string& method_name = entry.key().toStringRef();
auto compile_spec = entry.value().toGenericDict();

auto func = std::make_unique<mobile::nnc::Function>();
func->set_name(method_name);
func->set_nnc_kernel_id(compile_spec.at("nnc_kernel_id").toStringRef());
func->set_input_specs(get_input_specs(compile_spec));
func->set_output_specs(get_output_specs(compile_spec));

func->set_parameters(compile_spec.at("parameters").toList());

mobile::nnc::MemoryPlan plan;
plan.buffer_sizes_ = compile_spec.at("buffer_sizes").toIntVector();
func->set_memory_plan(plan);

cu.register_function(std::move(func));
}
return cu.serialize();
}

static auto reg = torch::jit::backend_preprocess_register("nnc", preprocess);

struct FakeTensor : torch::CustomClassHolder {
explicit FakeTensor(std::vector<int64_t> data) : data_(std::move(data)) {}
int64_t get() {
return data_[0];
}
std::vector<int64_t> data_;
};

TORCH_LIBRARY(_TorchScriptTesting, m) {
m.class_<FakeTensor>("_MobileNNCFakeTensor")
.def(torch::init<std::vector<int64_t>>())
.def("get", &FakeTensor::get)
.def_pickle(
[](c10::intrusive_ptr<FakeTensor> self) { // __getstate__
return self->data_;
},
[](std::vector<int64_t> state) { // __setstate__
return c10::make_intrusive<FakeTensor>(std::move(state));
});
}

} // namespace

extern "C" {

// The test kernels are supposed to be generated by the NNC compiler ahead-of-
// time. For integration test purpose we manually wrote instead.
int add_kernel(void** args) {
// out = input + param
at::Tensor input = at::from_blob(args[0], {4, 4}, at::kFloat);
at::Tensor out = at::from_blob(args[1], {4, 4}, at::kFloat);
at::Tensor param = at::from_blob(args[2], {1}, at::kFloat);
out.copy_(at::add(input, param));
return 0;
}

int fake_tensor_add_kernel(void** args) {
// out = input + param.get()
at::Tensor input = at::from_blob(args[0], {4, 4}, at::kFloat);
at::Tensor out = at::from_blob(args[1], {4, 4}, at::kFloat);
FakeTensor* param = reinterpret_cast<FakeTensor*>(args[2]);
out.copy_(at::add(input, param->get()));
return 0;
}

} // extern "C"

REGISTER_NNC_KERNEL("_add_kernel", add_kernel)
REGISTER_NNC_KERNEL("_fake_tensor_add_kernel", fake_tensor_add_kernel)

TEST(NNCBackendTest, AOTCompileThenExecute) {
torch::jit::Module m("m");
auto param = torch::ones({});
m.register_parameter("param", param, false);
m.define(R"(
def forward(self, input):
return input + self.param
)");

// Run the TorchScript module to get reference result.
std::vector<IValue> inputs;
inputs.emplace_back(2.0 * torch::ones({4, 4}));
auto reference = m.forward(inputs);

// Compile the model with NNC.
auto compile_spec = create_compile_spec(
"forward",
"_add_kernel",
{{4, 4}},
{{4, 4}},
c10::impl::toList(c10::List<at::Tensor>({param})),
{});
auto any_dict_ty =
c10::DictType::create(c10::StringType::get(), c10::AnyType::get());
auto compiled_module = torch::jit::detail::codegen_backend_module(
"nnc", m, compile_spec, any_dict_ty);

// Save the compiled model.
std::stringstream ss;
compiled_module._save_for_mobile(ss);

// Load and run the saved model.
auto loaded_module = _load_for_mobile(ss);
auto result = loaded_module.forward(inputs);
EXPECT_TRUE(result.toTensor().equal(3.0 * torch::ones({4, 4})));
EXPECT_TRUE(result.toTensor().equal(reference.toTensor()));
}

TEST(NNCBackendTest, FakeTensor) {
script::Module m("m");
auto param_cls = getCustomClass(
"__torch__.torch.classes._TorchScriptTesting._MobileNNCFakeTensor");
auto param_value = c10::make_intrusive<FakeTensor>(std::vector<int64_t>({3}));
m.register_attribute("param", param_cls, param_value, false);
m.define(
R"(
def forward(self, input):
return input + self.param.get()
)");

// Run the TorchScript module to get reference result.
std::vector<IValue> inputs;
inputs.emplace_back(2.0 * torch::ones({4, 4}));
auto reference = m.forward(inputs);

// Compile the model with NNC.
auto params = c10::impl::GenericList(c10::AnyType::get());
params.emplace_back(param_value);
auto compile_spec = create_compile_spec(
"forward",
"_fake_tensor_add_kernel",
{{4, 4}},
{{4, 4}},
params,
{});
auto any_dict_ty =
c10::DictType::create(c10::StringType::get(), c10::AnyType::get());
auto compiled_module = torch::jit::detail::codegen_backend_module(
"nnc", m, compile_spec, any_dict_ty);

// Save the compiled model.
std::stringstream ss;
compiled_module._save_for_mobile(ss);

// Load and run the saved model.
auto loaded_module = _load_for_mobile(ss);
auto result = loaded_module.forward(inputs);
EXPECT_TRUE(result.toTensor().equal(5.0 * torch::ones({4, 4})));
EXPECT_TRUE(result.toTensor().equal(reference.toTensor()));
}

} // namespace nnc
} // namespace mobile
} // namespace jit
} // namespace torch
5 changes: 3 additions & 2 deletions tools/build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ core_sources_full_mobile = [
"torch/csrc/jit/ir/subgraph_matcher.cpp",
"torch/csrc/jit/jit_log.cpp",
"torch/csrc/jit/jit_opt_limit.cpp",
"torch/csrc/jit/mobile/nnc/backend.cpp",
"torch/csrc/jit/mobile/nnc/context.cpp",
"torch/csrc/jit/mobile/nnc/registry.cpp",
"torch/csrc/jit/passes/annotate_warns.cpp",
"torch/csrc/jit/passes/bailout_graph.cpp",
"torch/csrc/jit/passes/batch_mm.cpp",
Expand Down Expand Up @@ -451,8 +454,6 @@ libtorch_extra_sources = libtorch_core_jit_sources + [
"torch/csrc/jit/mobile/interpreter.cpp",
"torch/csrc/jit/mobile/model_compatibility.cpp",
"torch/csrc/jit/mobile/module.cpp",
"torch/csrc/jit/mobile/nnc/context.cpp",
"torch/csrc/jit/mobile/nnc/registry.cpp",
"torch/csrc/jit/mobile/observer.cpp",
"torch/csrc/jit/mobile/train/export_data.cpp",
"torch/csrc/jit/mobile/train/optim/sgd.cpp",
Expand Down
1 change: 0 additions & 1 deletion torch/csrc/jit/mobile/nnc/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include <torch/csrc/jit/backends/backend.h>
#include <torch/csrc/jit/mobile/nnc/context.h>
#include <torch/script.h>

namespace torch {
namespace jit {
Expand Down
Loading

0 comments on commit 5824a86

Please sign in to comment.