Skip to content

Commit

Permalink
Enable proper override using MIMalloc (microsoft#9944)
Browse files Browse the repository at this point in the history
Redirect memory allocations to MiMalloc and advance its version to v2.0.3
Refactor for a universal ifdef
  • Loading branch information
yuslepukhin authored Dec 8, 2021
1 parent b34b991 commit a7f649d
Show file tree
Hide file tree
Showing 36 changed files with 167 additions and 282 deletions.
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ option(onnxruntime_ARMNN_RELU_USE_CPU "Use the CPU implementation for the Relu o
option(onnxruntime_ARMNN_BN_USE_CPU "Use the CPU implementation for the Batch Normalization operator for the ArmNN EP" ON)
option(onnxruntime_ENABLE_INSTRUMENT "Enable Instrument with Event Tracing for Windows (ETW)" OFF)
option(onnxruntime_USE_TELEMETRY "Build with Telemetry" OFF)
option(onnxruntime_USE_MIMALLOC "Override new/delete and arena allocator with mimalloc" OFF)
#The onnxruntime_PREFER_SYSTEM_LIB is mainly designed for package managers like apt/yum/vcpkg.
#Please note, by default Protobuf_USE_STATIC_LIBS is OFF but it's recommended to turn it ON on Windows. You should set it properly when onnxruntime_PREFER_SYSTEM_LIB is ON otherwise you'll hit linkage errors.
#If you have already installed protobuf(or the others) in your system at the default system paths(like /usr/include), then it's better to set onnxruntime_PREFER_SYSTEM_LIB ON. Otherwise onnxruntime may see two different protobuf versions and we won't know which one will be used, the worst case could be onnxruntime picked up header files from one of them but the binaries from the other one.
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/mimalloc
Submodule mimalloc updated 139 files
8 changes: 2 additions & 6 deletions cmake/external/mimalloc.cmake
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
set(mimalloc_root_dir ${PROJECT_SOURCE_DIR}/external/mimalloc)

if(onnxruntime_USE_MIMALLOC_STL_ALLOCATOR)
add_definitions(-DUSE_MIMALLOC_STL_ALLOCATOR) # used in ONNXRuntime
endif()
if(onnxruntime_USE_MIMALLOC_ARENA_ALLOCATOR)
add_definitions(-DUSE_MIMALLOC_ARENA_ALLOCATOR) # used in ONNXRuntime
endif()
add_definitions(-DUSE_MIMALLOC)
include_directories(${mimalloc_root_dir}/include)

option(MI_OVERRIDE "" OFF)
option(MI_BUILD_TESTS "" OFF)
option(MI_DEBUG_FULL "" OFF)

add_subdirectory(${mimalloc_root_dir} EXCLUDE_FROM_ALL)
set_target_properties(mimalloc-static PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
23 changes: 17 additions & 6 deletions cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS
${onnxruntime_common_src_patterns}
)

# Remove new/delete intercept. To deal with memory leaks
# Use either non-mimalloc build OR use mimalloc built-in features.
if(WIN32 AND onnxruntime_USE_MIMALLOC)
list(REMOVE_ITEM onnxruntime_common_src
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc"
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h")
endif()

source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_common_src})

onnxruntime_add_static_library(onnxruntime_common ${onnxruntime_common_src})
Expand All @@ -83,17 +91,20 @@ if (onnxruntime_USE_TELEMETRY)
set_target_properties(onnxruntime_common PROPERTIES COMPILE_FLAGS "/FI${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows/TraceLoggingConfigPrivate.h")
endif()

if (onnxruntime_USE_MIMALLOC_STL_ALLOCATOR OR onnxruntime_USE_MIMALLOC_ARENA_ALLOCATOR)
if (onnxruntime_USE_MIMALLOC)
if(NOT WIN32)
message(FATAL "Currently do not support MIMALLOC in GPU builds")
endif()
if(onnxruntime_USE_CUDA OR onnxruntime_USE_OPENVINO)
message(WARNING "Ignoring directive to use mimalloc on unimplemented targets")
elseif (${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
# Some of the non-windows targets see strange runtime failures
message(WARNING "Ignoring request to link to mimalloc - only windows supported")
message(WARNING "Currently do not support MIMALLOC in GPU builds")
else()
include(external/mimalloc.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES mimalloc-static)
list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES mimalloc-static)
target_link_libraries(onnxruntime_common mimalloc-static)
set(onnxruntime_mimalloc_shim_src "${ONNXRUNTIME_ROOT}/core/platform/windows/mimalloc/mimalloc_overloads.cc")
add_library(onnxruntime_mimalloc_shim ${onnxruntime_mimalloc_shim_src})
target_link_libraries(onnxruntime_mimalloc_shim mimalloc-static)
target_link_libraries(onnxruntime_common onnxruntime_mimalloc_shim)
endif()
endif()

Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_framework.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ if (onnxruntime_ENABLE_TRAINING)
target_include_directories(onnxruntime_framework PRIVATE ${DLPACK_INCLUDE_DIR})
endif()
onnxruntime_add_include_to_target(onnxruntime_framework onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} flatbuffers)

if (onnxruntime_USE_MIMALLOC)
target_link_libraries(onnxruntime_framework mimalloc-static)
endif()

set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")
# need onnx to build to create headers that this project includes
add_dependencies(onnxruntime_framework ${onnxruntime_EXTERNAL_DEPENDENCIES})
Expand Down
4 changes: 0 additions & 4 deletions include/onnxruntime/core/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
#include "core/common/make_string.h"
#include "core/common/status.h"

#ifdef USE_MIMALLOC_ARENA_ALLOCATOR
#include <mimalloc.h>
#endif

namespace onnxruntime {

using TimePoint = std::chrono::high_resolution_clock::time_point;
Expand Down
21 changes: 3 additions & 18 deletions include/onnxruntime/core/framework/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,24 +183,9 @@ class CPUAllocator : public IAllocator {
void Free(void* p) override;
};

#if defined(USE_MIMALLOC_ARENA_ALLOCATOR)
class MiMallocAllocator : public IAllocator {
public:
explicit MiMallocAllocator(const OrtMemoryInfo& memory_info) : IAllocator(memory_info) {}
MiMallocAllocator() : IAllocator(OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator)) {}

void* Alloc(size_t size) override;
void Free(void* p) override;
};

#endif

#if defined(USE_MIMALLOC_ARENA_ALLOCATOR)
using TAllocator = MiMallocAllocator;
#else
using TAllocator = CPUAllocator;
#endif

using AllocatorPtr = std::shared_ptr<IAllocator>;

void* AllocatorDefaultAlloc(size_t size);
void AllocatorDefaultFree(void* p);

} // namespace onnxruntime
69 changes: 63 additions & 6 deletions onnxruntime/core/framework/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
#include "core/common/safeint.h"
#include "core/framework/allocator.h"
#include "core/framework/allocatormgr.h"
#include "core/mlas/inc/mlas.h"
#include "core/framework/utils.h"
#include "core/session/ort_apis.h"
#include <cstdlib>
#include <sstream>

#if defined(USE_MIMALLOC)
#include <mimalloc.h>
#endif

namespace onnxruntime {

// private helper for calculation so SafeInt usage doesn't bleed into the public allocator.h header
Expand All @@ -34,22 +39,73 @@ bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, siz
return ok;
}

#if defined(USE_MIMALLOC_ARENA_ALLOCATOR)
void* MiMallocAllocator::Alloc(size_t size) {
return mi_malloc(size);
#ifdef USE_MIMALLOC
void* AllocatorDefaultAlloc(size_t size) {
const size_t alignment = MlasGetPreferredBufferAlignment();
if (size <= 0) return nullptr;
void* p;
#if defined(_MSC_VER)
p = mi_malloc_aligned(size, alignment);
if (p == nullptr)
ORT_THROW_EX(std::bad_alloc);
#elif defined(_LIBCPP_SGX_CONFIG)
p = mi_memalign(alignment, size);
if (p == nullptr)
ORT_THROW_EX(std::bad_alloc);
#else
int ret = mi_posix_memalign(&p, alignment, size);
if (ret != 0)
ORT_THROW_EX(std::bad_alloc);
#endif
return p;
}

void MiMallocAllocator::Free(void* p) {
void AllocatorDefaultFree(void* p) {
#if defined(_MSC_VER)
const size_t alignment = MlasGetPreferredBufferAlignment();
mi_free_aligned(p, alignment);
#else
mi_free(p);
#endif
}

#else
void* AllocatorDefaultAlloc(size_t size) {
const size_t alignment = MlasGetPreferredBufferAlignment();
if (size <= 0) return nullptr;
void* p;
#if _MSC_VER
p = _aligned_malloc(size, alignment);
if (p == nullptr)
ORT_THROW_EX(std::bad_alloc);
#elif defined(_LIBCPP_SGX_CONFIG)
p = memalign(alignment, size);
if (p == nullptr)
ORT_THROW_EX(std::bad_alloc);
#else
int ret = posix_memalign(&p, alignment, size);
if (ret != 0)
ORT_THROW_EX(std::bad_alloc);
#endif
return p;
}

void AllocatorDefaultFree(void* p) {
#if _MSC_VER
_aligned_free(p);
#else
free(p);
#endif
}

#endif // USE_MIMALLOC

void* CPUAllocator::Alloc(size_t size) {
return utils::DefaultAlloc(size);
return AllocatorDefaultAlloc(size);
}

void CPUAllocator::Free(void* p) {
utils::DefaultFree(p);
AllocatorDefaultFree(p);
}
} // namespace onnxruntime

Expand Down Expand Up @@ -108,3 +164,4 @@ ORT_API_STATUS_IMPL(OrtApis::CompareMemoryInfo, _In_ const OrtMemoryInfo* info1,
*out = (*info1 == *info2) ? 0 : -1;
return nullptr;
}

14 changes: 4 additions & 10 deletions onnxruntime/core/framework/allocatormgr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include "core/framework/allocatormgr.h"
#include "core/framework/bfc_arena.h"
#include "core/framework/mimalloc_allocator.h"
#include "core/common/logging/logging.h"
#include <mutex>
#include <sstream>
Expand All @@ -21,7 +20,7 @@ inline int MakeKey(int id, OrtMemType mem_type) {
} // namespace

AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info) {
auto device_allocator = std::unique_ptr<IAllocator>(info.device_alloc_factory(info.device_id));
auto device_allocator = info.device_alloc_factory(info.device_id);

if (info.use_arena) {
size_t max_mem = info.arena_cfg.max_mem == 0 ? BFCArena::DEFAULT_MAX_MEM : info.arena_cfg.max_mem;
Expand All @@ -48,21 +47,16 @@ AllocatorPtr CreateAllocator(const AllocatorCreationInfo& info) {
return nullptr;
}

#ifdef USE_MIMALLOC_ARENA_ALLOCATOR
return std::shared_ptr<IAllocator>(
std::make_unique<MiMallocAllocator>(max_mem));
#else
return std::shared_ptr<IAllocator>(
return AllocatorPtr(
std::make_unique<BFCArena>(std::move(device_allocator),
max_mem,
arena_extend_str,
initial_chunk_size_bytes,
max_dead_bytes_per_chunk,
initial_growth_chunk_size_bytes));
#endif
} else {
return device_allocator;
}

return AllocatorPtr(std::move(device_allocator));
}

// Update allocator in the provider if already present; ignore if not.
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/bfc_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ class BFCArena : public IAllocator {

AllocationRegion(AllocationRegion&& other) noexcept { Swap(other); }

AllocationRegion& operator=(AllocationRegion&& other) {
AllocationRegion& operator=(AllocationRegion&& other) noexcept {
Swap(other);
return *this;
}
Expand Down
18 changes: 14 additions & 4 deletions onnxruntime/core/framework/error_code.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,23 @@ struct OrtStatus {
#pragma warning(disable : 28196)
#pragma warning(disable : 6387)
#endif

namespace {
inline OrtStatus* NewStatus(size_t clen) {
auto* buf = new (std::nothrow) uint8_t[sizeof(OrtStatus) + clen];
if (buf == nullptr) return nullptr; // OOM. What we can do here? abort()?
return new (buf) OrtStatus;
}
} // namespace

//Even we say it may not return NULL, indeed it may.
_Check_return_ _Ret_notnull_ OrtStatus* ORT_API_CALL OrtApis::CreateStatus(OrtErrorCode code,
_In_z_ const char* msg) NO_EXCEPTION {
assert(!(code == 0 && msg != nullptr));
SafeInt<size_t> clen(nullptr == msg ? 0 : strnlen(msg, onnxruntime::kMaxStrLen));
OrtStatus* p = reinterpret_cast<OrtStatus*>(::malloc(sizeof(OrtStatus) + clen));
if (p == nullptr) return nullptr; // OOM. What we can do here? abort()?
OrtStatus* p = NewStatus(clen);
if (p == nullptr)
return nullptr;
p->code = code;
memcpy(p->msg, msg, clen);
p->msg[clen] = '\0';
Expand All @@ -37,7 +47,7 @@ _Ret_notnull_ OrtStatus* ToOrtStatus(const Status& st) {
if (st.IsOK())
return nullptr;
SafeInt<size_t> clen(st.ErrorMessage().length());
OrtStatus* p = reinterpret_cast<OrtStatus*>(::malloc(sizeof(OrtStatus) + clen));
OrtStatus* p = NewStatus(clen);
if (p == nullptr)
return nullptr;
p->code = static_cast<OrtErrorCode>(st.Code());
Expand All @@ -57,4 +67,4 @@ ORT_API(const char*, OrtApis::GetErrorMessage, _In_ const OrtStatus* status) {
return status->msg;
}

ORT_API(void, OrtApis::ReleaseStatus, _Frees_ptr_opt_ OrtStatus* value) { ::free(value); }
ORT_API(void, OrtApis::ReleaseStatus, _Frees_ptr_opt_ OrtStatus* value) { delete[] reinterpret_cast<uint8_t*>(value); }
44 changes: 0 additions & 44 deletions onnxruntime/core/framework/mimalloc_allocator.cc

This file was deleted.

31 changes: 0 additions & 31 deletions onnxruntime/core/framework/mimalloc_allocator.h

This file was deleted.

Loading

0 comments on commit a7f649d

Please sign in to comment.