Move reduced ops files into build directory (microsoft#10030)

In a reduced ops build, some source files get updated. This change moves the updated files into the build directory. This way, it is easier to simultaneously manage different build directories (with possibly different reduced ops configurations) based on a single source directory.
garymm · Dec 29, 2021 · 3bc91c2 · 3bc91c2
1 parent a367f06
commit 3bc91c2
Show file tree

Hide file tree

Showing 17 changed files with 174 additions and 184 deletions.
diff --git a/.gitignore b/.gitignore
@@ -60,11 +60,3 @@ onnxruntime/python/version_info.py
 .envrc
 .psenvrc
 *.csproj.user
-# exclude generated reduced kernel registration and type control
-onnxruntime/contrib_ops/cpu/cpu_contrib_kernels_reduced_ops.cc
-onnxruntime/core/providers/cpu/cpu_execution_provider_reduced_ops.cc
-orttraining/orttraining/training_ops/cpu/cpu_training_kernels_reduced_ops.cc
-onnxruntime/contrib_ops/cuda/cuda_contrib_kernels_reduced_ops.cc
-onnxruntime/core/providers/cuda/cuda_execution_provider_reduced_ops.cc
-orttraining/orttraining/training_ops/cuda/cuda_training_kernels_reduced_ops.cc
-onnxruntime/core/providers/op_kernel_type_control_overrides_reduced_types.inc
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -118,7 +118,6 @@ cmake_dependent_option(onnxruntime_DISABLE_EXCEPTIONS "Disable exception handlin
 option(onnxruntime_EXTENDED_MINIMAL_BUILD "onnxruntime_MINIMAL_BUILD with support for execution providers that compile kernels." OFF)
 option(onnxruntime_MINIMAL_BUILD_CUSTOM_OPS "Add custom operator kernels support to a minimal build." OFF)
 option(onnxruntime_REDUCED_OPS_BUILD "Reduced set of kernels are registered in build via modification of the kernel registration source files." OFF)
-option(onnxruntime_REDUCED_OP_TYPE_SUPPORT "Limit the types individual operators support where possible to further reduce the build size." OFF)
 option(onnxruntime_DISABLE_EXTERNAL_INITIALIZERS "Don't allow models to load external data" OFF)
 cmake_dependent_option(onnxruntime_ENABLE_ORT_FORMAT_RUNTIME_GRAPH_OPTIMIZATION
                        "Enable runtime graph optimization of ORT format models. Warning: Not yet ready for general use."
@@ -390,9 +389,6 @@ endif()
 
 if (onnxruntime_REDUCED_OPS_BUILD)
   add_compile_definitions(REDUCED_OPS_BUILD)
-  if (onnxruntime_REDUCED_OP_TYPE_SUPPORT)
-    add_compile_definitions(REDUCED_OP_TYPE_SUPPORT)
-  endif()
 endif()
 
 if (onnxruntime_DISABLE_EXTERNAL_INITIALIZERS)

diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -1,6 +1,64 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+# Reduced ops build helpers
+
+# In a reduced ops build, the reduction is performed by updating source files.
+# Rather than modifying the source files directly, updated versions will be
+# saved to another location in the build directory: ${op_reduction_root}.
+set(op_reduction_root "${CMAKE_BINARY_DIR}/op_reduction.generated")
+
+# This helper function replaces the relevant original source files with their
+# updated, reduced ops versions in `all_srcs`.
+function(substitute_op_reduction_srcs all_srcs)
+  # files that are potentially updated in a reduced ops build
+  set(original_srcs
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc"
+    "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc"
+    "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc"
+    )
+
+  set(replacement_srcs)
+
+  foreach(original_src ${original_srcs})
+    string(FIND "${${all_srcs}}" "${original_src}" idx)
+    if(idx EQUAL "-1")
+      continue()
+    endif()
+
+    file(RELATIVE_PATH src_relative_path "${REPO_ROOT}" "${original_src}")
+    set(replacement_src "${op_reduction_root}/${src_relative_path}")
+
+    message("File '${original_src}' substituted with reduced op version '${replacement_src}'.")
+
+    string(REPLACE "${original_src}" "${replacement_src}" ${all_srcs} "${${all_srcs}}")
+
+    list(APPEND replacement_srcs "${replacement_src}")
+  endforeach()
+
+  if(replacement_srcs)
+    source_group(TREE "${op_reduction_root}" PREFIX "op_reduction.generated" FILES ${replacement_srcs})
+  endif()
+
+  set(${all_srcs} "${${all_srcs}}" PARENT_SCOPE)
+endfunction()
+
+# This helper function adds reduced ops build-specific include directories to
+# `target`.
+function(add_op_reduction_include_dirs target)
+  set(op_reduction_include_dirs "${op_reduction_root}/onnxruntime")
+  if (onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_TRAINING_OPS)
+    list(APPEND op_reduction_include_dirs "${op_reduction_root}/orttraining")
+  endif()
+  # add include directories BEFORE so they are searched first, giving op reduction file paths precedence
+  target_include_directories(${target} BEFORE PRIVATE ${op_reduction_include_dirs})
+endfunction()
+
+
 file(GLOB_RECURSE onnxruntime_providers_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/cpu/*.cc"
@@ -45,16 +103,10 @@ file(GLOB_RECURSE onnxruntime_rocm_generated_contrib_ops_cu_srcs CONFIGURE_DEPEN
   "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime/contrib_ops/rocm/*.cuh"
 )
 
-
 file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/*.cc"
-  # If we are building with reduced number of kernel registration and types,
-  # "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
-  # will be generated with type specifications code.
-  # For simplicity, we inlcude both .inc files,
-  # see onnxruntime/core/providers/op_kernel_type_control.h
-  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides*.inc"
+  "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc"
 )
 
 if(onnxruntime_USE_NUPHAR)
@@ -176,7 +228,13 @@ if (onnxruntime_ENABLE_TRAINING)
   list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
 endif()
 
+if (onnxruntime_REDUCED_OPS_BUILD)
+  substitute_op_reduction_srcs(onnxruntime_providers_src)
+endif()
 onnxruntime_add_static_library(onnxruntime_providers ${onnxruntime_providers_src})
+if (onnxruntime_REDUCED_OPS_BUILD)
+  add_op_reduction_include_dirs(onnxruntime_providers)
+endif()
 
 if (MSVC)
    target_compile_options(onnxruntime_providers PRIVATE "/bigobj")
@@ -323,7 +381,13 @@ if (onnxruntime_USE_CUDA)
     list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
   endif()
 
+  if (onnxruntime_REDUCED_OPS_BUILD)
+    substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
+  endif()
   onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
+  if (onnxruntime_REDUCED_OPS_BUILD)
+    add_op_reduction_include_dirs(onnxruntime_providers_cuda)
+  endif()
 
   #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"/analyze:stacksize 131072\">")
   if (HAS_GUARD_CF)

diff --git a/docs/Reduced_Operator_Kernel_build.md b/docs/Reduced_Operator_Kernel_build.md
@@ -4,17 +4,23 @@ In order to reduce the compiled binary size of ONNX Runtime (ORT), the operator
 
 A configuration file must be created with details of the kernels that are required.
 
-Following that, ORT must be manually built, providing the configuration file in the `--include_ops_by_config` parameter. The build process will update the ORT kernel registration source files to exclude the unused kernels.
+Following that, ORT must be manually built, providing the configuration file in the [build.py](../tools/ci_build/build.py) `--include_ops_by_config` argument.
 
 See the [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#build-instructions) for more details on building ORT.
 
-When building ORT with a reduced set of kernel registrations, `--skip_tests` **MUST** be specified as the kernel reduction will render many of the unit tests invalid.
+The build process will generate updated ORT kernel registration and type reduction source files to exclude unused kernel implementations.
+The generated files will be under the build directory and the original source files that they are based on are not directly modified.
+When building, the generated files will be used instead of the original files.
 
-NOTE: The operator exclusion logic when building with an operator reduction configuration file will only disable kernel registrations each time it runs. It will NOT re-enable previously disabled kernels. If you wish to change the list of kernels included, it is best to revert the repository to a clean state (e.g. via `git reset --hard`) before building ORT again.
+The operator exclusion logic only runs during the build file generation (or "update") phase of the build process, i.e., when invoking build.py with no build phase arguments or explicitly with `--update`.
+
+Note: It is also possible to run the operator exclusion logic independently with [reduce_op_kernels.py](../tools/ci_build/reduce_op_kernels.py). This may be useful when building ORT without using build.py.
+As the generated files will go into a build directory, the build directory must be provided with the reduce_op_kernels.py `--cmake_build_dir` argument.
+Note that this argument is slightly different from the build.py `--build_dir` argument - build.py will append an additional directory for the build configuration to its `--build_dir` value to get the equivalent of `--cmake_build_dir`.
 
 ## Creating a configuration file with the required kernels
 
-The script in `<ORT Root>/tools/python/create_reduced_build_config.py` should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.
+The [create_reduced_build_config.py](../tools/python/create_reduced_build_config.py) script should be used to create the configuration file. This file can be manually edited as needed. The configuration can be created from either ONNX or ORT format models.
 
 ```
 create_reduced_build_config.py --help
@@ -35,7 +41,7 @@ optional arguments:
 
 ### Type reduction
 
-If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if `--enable_type_reduction` is specified. This can be used to further reduce the build size if `--enable_reduced_operator_type_support` is specified when building ORT.
+If the configuration file is created using ORT format models, the input/output types that individual operators require can be tracked if the `--enable_type_reduction` argument is specified. This can be used to further reduce the build size if the build.py `--enable_reduced_operator_type_support` argument is specified when building ORT.
 
 ONNX format models are not guaranteed to include the required per-node type information, so cannot be used with this option.
 

diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "contrib_ops/cpu/cpu_contrib_kernels.h"
 #include "core/graph/constants.h"
 #include "core/mlas/inc/mlas.h"
@@ -278,5 +268,3 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc b/onnxruntime/contrib_ops/cuda/cuda_contrib_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_common.h"
 
@@ -214,5 +204,3 @@ Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
 }  // namespace cuda
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/cpu/cpu_execution_provider.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/kernel_registry.h"
@@ -2074,5 +2064,3 @@ std::unique_ptr<IDataTransfer> CPUExecutionProvider::GetDataTransfer() const {
   return std::make_unique<CPUDataTransfer>();
 }
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_execution_provider.h"
 #include "core/providers/cuda/cuda_common.h"
@@ -2363,5 +2353,3 @@ void CUDAExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManager>
 }
 
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/onnxruntime/core/providers/op_kernel_type_control.h b/onnxruntime/core/providers/op_kernel_type_control.h
@@ -469,17 +469,4 @@ struct EnabledTypes {
 #include "core/framework/data_types.h"  // for types that might be used in type specifications
 
 // all allowed type specifications should be contained in the following file
-
-// If we are building with reduced number of kernel registration and types
-// <op_kernel_type_control_overrides.inc> will be copied to
-// <core/providers/op_kernel_type_control_overrides_reduced_types.inc>,
-// where the type specifications code will be inserted,
-// This will prevent,
-// 1. Accidental commit of the modified <op_kernel_type_control_overrides.inc>
-// 2. If the required ops and types config has changed, user has to revert the changes to
-//    <op_kernel_type_control_overrides.inc>
-#ifndef REDUCED_OP_TYPE_SUPPORT
 #include "core/providers/op_kernel_type_control_overrides.inc"
-#else
-#include "core/providers/op_kernel_type_control_overrides_reduced_types.inc"
-#endif
diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#if !defined(REDUCED_OPS_BUILD)  // may not work with excluded op kernel implementations
+
 #include "core/common/logging/logging.h"
 #include "core/framework/compute_capability.h"
 #include "core/framework/utils.h"
@@ -342,3 +344,5 @@ TEST(InternalTestingEP, DISABLED_TestNnapiPartitioningMlPerfModels) {
 
 }  // namespace test
 }  // namespace onnxruntime
+
+#endif  // !defined(REDUCED_OPS_BUILD)
diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#if !defined(REDUCED_OPS_BUILD)  // may not work with excluded op kernel implementations
+
 #include "core/common/logging/logging.h"
 #include "core/framework/utils.h"
 #include "core/session/inference_session.h"
@@ -354,3 +356,5 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) {
 }
 }  // namespace test
 }  // namespace onnxruntime
+
+#endif  // !defined(REDUCED_OPS_BUILD)
diff --git a/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc b/orttraining/orttraining/training_ops/cpu/cpu_training_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "orttraining/training_ops/cpu/cpu_training_kernels.h"
 #include "core/graph/constants.h"
 
@@ -244,5 +234,3 @@ Status RegisterCpuTrainingKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace contrib
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD
diff --git a/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc b/orttraining/orttraining/training_ops/cuda/cuda_training_kernels.cc
@@ -1,16 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-// If we are building with reduced number of kernel registration,
-// this file will be copied to <file_name>_reduced_ops.cc,
-// where the unused kernel registration will be commented out
-// and the "#ifndef REDUCED_OPS_BUILD" be replaced with "#ifdef REDUCED_OPS_BUILD"
-// This will prevent,
-// 1. Accidental commit of the reduced kernel registration files
-// 2. If the required ops config has changed, user has to revert the changes to
-//    the kernel registration files
-#ifndef REDUCED_OPS_BUILD
-
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/cuda/cuda_fwd.h"
 #include "core/providers/cuda/cuda_pch.h"
@@ -468,5 +458,3 @@ Status RegisterCudaTrainingKernels(KernelRegistry& kernel_registry) {
 
 }  // namespace cuda
 }  // namespace onnxruntime
-
-#endif  // #ifndef REDUCED_OPS_BUILD