Skip to content

Commit

Permalink
Move reduced ops files into build directory (microsoft#10030)
Browse files Browse the repository at this point in the history
In a reduced ops build, some source files get updated. This change moves the updated files into the build directory. This way, it is easier to simultaneously manage different build directories (with possibly different reduced ops configurations) based on a single source directory.
  • Loading branch information
edgchen1 authored Dec 29, 2021
1 parent a367f06 commit 3bc91c2
Show file tree
Hide file tree
Showing 17 changed files with 174 additions and 184 deletions.
8 changes: 0 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,3 @@ onnxruntime/python/version_info.py
.envrc
.psenvrc
*.csproj.user
# exclude generated reduced kernel registration and type control
onnxruntime/contrib_ops/cpu/cpu_contrib_kernels_reduced_ops.cc
onnxruntime/core/providers/cpu/cpu_execution_provider_reduced_ops.cc
orttraining/orttraining/training_ops/cpu/cpu_training_kernels_reduced_ops.cc
onnxruntime/contrib_ops/cuda/cuda_contrib_kernels_reduced_ops.cc
onnxruntime/core/providers/cuda/cuda_execution_provider_reduced_ops.cc
orttraining/orttraining/training_ops/cuda/cuda_training_kernels_reduced_ops.cc
onnxruntime/core/providers/op_kernel_type_control_overrides_reduced_types.inc
4 changes: 0 additions & 4 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handlin
option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF)
option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF)
option(onnxruntime_REDUCED_OPS_BUILD "Reduced set of kernels are registered in build via modification of the kernel registration source files." OFF)
option(onnxruntime_REDUCED_OP_TYPE_SUPPORT "Limit the types individual operators support where possible to further reduce the build size." OFF)
option(onnxruntime_DISABLE_EXTERNAL_INITIALIZERS "Don't allow models to load external data" OFF)
cmake_dependent_option(onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION
"Enable runtime graph optimization of ORT format models. Warning: Not yet ready for general use."
Expand Down Expand Up @@ -390,9 +389,6 @@ endif()

if (onnxruntime_REDUCED_OPS_BUILD)
add_compile_definitions(REDUCED_OPS_BUILD)
if (onnxruntime_REDUCED_OP_TYPE_SUPPORT)
add_compile_definitions(REDUCED_OP_TYPE_SUPPORT)
endif()
endif()

if (onnxruntime_DISABLE_EXTERNAL_INITIALIZERS)
Expand Down
78 changes: 71 additions & 7 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,64 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# Reduced ops build helpers

# In a reduced ops build, the reduction is performed by updating source files.
# Rather than modifying the source files directly, updated versions will be
# saved to another location in the build directory: ${op_reduction_root}.
set(op_reduction_root "${CMAKE_BINARY_DIR}/op_reduction.generated")

# This helper function replaces the relevant original source files with their
# updated, reduced ops versions in `all_srcs`.
function(substitute_op_reduction_srcs all_srcs)
# files that are potentially updated in a reduced ops build
set(original_srcs
"${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc"
"${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc"
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc"
"${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc"
)

set(replacement_srcs)

foreach(original_src ${original_srcs})
string(FIND "${${all_srcs}}" "${original_src}" idx)
if(idx EQUAL "-1")
continue()
endif()

file(RELATIVE_PATH src_relative_path "${REPO_ROOT}" "${original_src}")
set(replacement_src "${op_reduction_root}/${src_relative_path}")

message("File '${original_src}' substituted with reduced op version '${replacement_src}'.")

string(REPLACE "${original_src}" "${replacement_src}" ${all_srcs} "${${all_srcs}}")

list(APPEND replacement_srcs "${replacement_src}")
endforeach()

if(replacement_srcs)
source_group(TREE "${op_reduction_root}" PREFIX "op_reduction.generated" FILES ${replacement_srcs})
endif()

set(${all_srcs} "${${all_srcs}}" PARENT_SCOPE)
endfunction()

# This helper function adds reduced ops build-specific include directories to
# `target`.
function(add_op_reduction_include_dirs target)
set(op_reduction_include_dirs "${op_reduction_root}/onnxruntime")
if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS)
list(APPEND op_reduction_include_dirs "${op_reduction_root}/orttraining")
endif()
# add include directories BEFORE so they are searched first, giving op reduction file paths precedence
target_include_directories(${target} BEFORE PRIVATE ${op_reduction_include_dirs})
endfunction()


file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc"
Expand Down Expand Up @@ -45,16 +103,10 @@ file(GLOB_RECURSE onnxruntime_rocm_generated_contrib_ops_cu_srcs CONFIGURE_DEPEN
"${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime/contrib_ops/rocm/*.cuh"
)


file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
# If we are building with reduced number of kernel registration and types,
# "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
# will be generated with type specifications code.
# For simplicity, we inlcude both .inc files,
# see onnxruntime/core/providers/op_kernel_type_control.h
"${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides*.inc"
"${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
)

if(onnxruntime_USE_NUPHAR)
Expand Down Expand Up @@ -176,7 +228,13 @@ if (onnxruntime_ENABLE_TRAINING)
list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
endif()

if (onnxruntime_REDUCED_OPS_BUILD)
substitute_op_reduction_srcs(onnxruntime_providers_src)
endif()
onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
if (onnxruntime_REDUCED_OPS_BUILD)
add_op_reduction_include_dirs(onnxruntime_providers)
endif()

if (MSVC)
target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
Expand Down Expand Up @@ -323,7 +381,13 @@ if (onnxruntime_USE_CUDA)
list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
endif()

if (onnxruntime_REDUCED_OPS_BUILD)
substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
endif()
onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
if (onnxruntime_REDUCED_OPS_BUILD)
add_op_reduction_include_dirs(onnxruntime_providers_cuda)
endif()

#target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"/analyze:stacksize 131072\">")
if (HAS_GUARD_CF)
Expand Down
16 changes: 11 additions & 5 deletions docs/Reduced_Operator_Kernel_build.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,23 @@ In order to reduce the compiled binary size of ONNX Runtime (ORT), the operator

A configuration file must be created with details of the kernels that are required.

Following that, ORT must be manually built, providing the configuration file in the `--include_ops_by_config` parameter. The build process will update the ORT kernel registration source files to exclude the unused kernels.
Following that, ORT must be manually built, providing the configuration file in the [build.py](../tools/ci_build/build.py) `--include_ops_by_config` argument.

See the [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#build-instructions) for more details on building ORT.

When building ORT with a reduced set of kernel registrations, `--skip_tests` **MUST** be specified as the kernel reduction will render many of the unit tests invalid.
The build process will generate updated ORT kernel registration and type reduction source files to exclude unused kernel implementations.
The generated files will be under the build directory and the original source files that they are based on are not directly modified.
When building, the generated files will be used instead of the original files.

NOTE: The operator exclusion logic when building with an operator reduction configuration file will only disable kernel registrations each time it runs. It will NOT re-enable previously disabled kernels. If you wish to change the list of kernels included, it is best to revert the repository to a clean state (e.g. via `git reset --hard`) before building ORT again.
The operator exclusion logic only runs during the build file generation (or "update") phase of the build process, i.e., when invoking build.py with no build phase arguments or explicitly with `--update`.

Note: It is also possible to run the operator exclusion logic independently with [reduce_op_kernels.py](../tools/ci_build/reduce_op_kernels.py). This may be useful when building ORT without using build.py.
As the generated files will go into a build directory, the build directory must be provided with the reduce_op_kernels.py `--cmake_build_dir` argument.
Note that this argument is slightly different from the build.py `--build_dir` argument - build.py will append an additional directory for the build configuration to its `--build_dir` value to get the equivalent of `--cmake_build_dir`.

## Creating a configuration file with the required kernels

The script in `<ORT Root>/tools/python/create_reduced_build_config.py` should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.
The [create_reduced_build_config.py](../tools/python/create_reduced_build_config.py) script should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.

```
create_reduced_build_config.py --help
Expand All @@ -35,7 +41,7 @@ optional arguments:

### Type reduction

If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if `--enable_type_reduction` is specified. This can be used to further reduce the build size if `--enable_reduced_operator_type_support` is specified when building ORT.
If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if the `--enable_type_reduction` argument is specified. This can be used to further reduce the build size if the build.py `--enable_reduced_operator_type_support` argument is specified when building ORT.

ONNX format models are not guaranteed to include the required per-node type information, so cannot be used with this option.

Expand Down
12 changes: 0 additions & 12 deletions onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "contrib_ops/cpu/cpu_contrib_kernels.h"
#include "core/graph/constants.h"
#include "core/mlas/inc/mlas.h"
Expand Down Expand Up @@ -278,5 +268,3 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {

} // namespace contrib
} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
12 changes: 0 additions & 12 deletions onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "core/providers/shared_library/provider_api.h"
#include "core/providers/cuda/cuda_common.h"

Expand Down Expand Up @@ -214,5 +204,3 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
} // namespace cuda
} // namespace contrib
} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
12 changes: 0 additions & 12 deletions onnxruntime/core/providers/cpu/cpu_execution_provider.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "core/providers/cpu/cpu_execution_provider.h"
#include "core/framework/op_kernel.h"
#include "core/framework/kernel_registry.h"
Expand Down Expand Up @@ -2074,5 +2064,3 @@ std::unique_ptr<IDataTransfer> CPUExecutionProvider::GetDataTransfer() const {
return std::make_unique<CPUDataTransfer>();
}
} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
12 changes: 0 additions & 12 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "core/providers/shared_library/provider_api.h"
#include "core/providers/cuda/cuda_execution_provider.h"
#include "core/providers/cuda/cuda_common.h"
Expand Down Expand Up @@ -2363,5 +2353,3 @@ void CUDAExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManager>
}

} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
13 changes: 0 additions & 13 deletions onnxruntime/core/providers/op_kernel_type_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -469,17 +469,4 @@ struct EnabledTypes {
#include "core/framework/data_types.h" // for types that might be used in type specifications

// all allowed type specifications should be contained in the following file

// If we are building with reduced number of kernel registration and types
// <op_kernel_type_control_overrides.inc> will be copied to
// <core/providers/op_kernel_type_control_overrides_reduced_types.inc>,
// where the type specifications code will be inserted,
// This will prevent,
// 1. Accidental commit of the modified <op_kernel_type_control_overrides.inc>
// 2. If the required ops and types config has changed, user has to revert the changes to
// <op_kernel_type_control_overrides.inc>
#ifndef REDUCED_OP_TYPE_SUPPORT
#include "core/providers/op_kernel_type_control_overrides.inc"
#else
#include "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
#endif
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations

#include "core/common/logging/logging.h"
#include "core/framework/compute_capability.h"
#include "core/framework/utils.h"
Expand Down Expand Up @@ -342,3 +344,5 @@ TEST(InternalTestingEP, DISABLED_TestNnapiPartitioningMlPerfModels) {

} // namespace test
} // namespace onnxruntime

#endif // !defined(REDUCED_OPS_BUILD)
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#if !defined(REDUCED_OPS_BUILD) // may not work with excluded op kernel implementations

#include "core/common/logging/logging.h"
#include "core/framework/utils.h"
#include "core/session/inference_session.h"
Expand Down Expand Up @@ -354,3 +356,5 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) {
}
} // namespace test
} // namespace onnxruntime

#endif // !defined(REDUCED_OPS_BUILD)
12 changes: 0 additions & 12 deletions orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "orttraining/training_ops/cpu/cpu_training_kernels.h"
#include "core/graph/constants.h"

Expand Down Expand Up @@ -244,5 +234,3 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) {

} // namespace contrib
} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
12 changes: 0 additions & 12 deletions orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// If we are building with reduced number of kernel registration,
// this file will be copied to <file_name>_reduced_ops.cc,
// where the unused kernel registration will be commented out
// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
// This will prevent,
// 1. Accidental commit of the reduced kernel registration files
// 2. If the required ops config has changed, user has to revert the changes to
// the kernel registration files
#ifndef REDUCED_OPS_BUILD

#include "core/providers/shared_library/provider_api.h"
#include "core/providers/cuda/cuda_fwd.h"
#include "core/providers/cuda/cuda_pch.h"
Expand Down Expand Up @@ -468,5 +458,3 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {

} // namespace cuda
} // namespace onnxruntime

#endif // #ifndef REDUCED_OPS_BUILD
Loading

0 comments on commit 3bc91c2

Please sign in to comment.