Skip to content

Commit

Permalink
POWER10: Add optimized dgemm kernel (microsoft#9652)
Browse files Browse the repository at this point in the history
* POWER10: Add optimized dgemm kernel

This patch makes use of POWER10 matrix multiply assist feature and
adds new DGEMM kernel.

* Indentation update

Co-authored-by: Rajalakshmi Srinivasaraghavan <rajis@linux.ibm.com>
  • Loading branch information
RajalakshmiSR and Rajalakshmi Srinivasaraghavan authored Nov 23, 2021
1 parent bf5e9a5 commit 8564fc1
Show file tree
Hide file tree
Showing 8 changed files with 799 additions and 599 deletions.
5 changes: 4 additions & 1 deletion cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ else()
${MLAS_SRC_DIR}/dgemm.cpp
${MLAS_SRC_DIR}/power/DgemmKernelPower.cpp
)
set_source_files_properties(${MLAS_SRC_DIR}/power/SgemmKernelPower.cpp PROPERTIES COMPILE_FLAGS "-DSINGLE")
check_cxx_compiler_flag("-mcpu=power10" HAS_POWER10)
if(HAS_POWER10)
set(CMAKE_REQUIRED_FLAGS "-mcpu=power10")
Expand Down Expand Up @@ -318,8 +319,10 @@ else()
endif()
set(mlas_platform_srcs_power10
${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp
${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp
)
set_source_files_properties(${mlas_platform_srcs_power10} PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10")
set_source_files_properties(${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10 -DSINGLE")
set_source_files_properties(${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10")
set(mlas_platform_srcs
${mlas_platform_srcs}
${mlas_platform_srcs_power10}
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/mlas/lib/mlasi.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ extern "C" {
MLAS_GEMM_FLOAT_KERNEL MlasSgemmKernel;
MLAS_GEMM_FLOAT_KERNEL MlasSgemmKernelPOWER10;
MLAS_GEMM_DOUBLE_KERNEL MlasDgemmKernel;
MLAS_GEMM_DOUBLE_KERNEL MlasDgemmKernelPOWER10;
#else
MLAS_GEMM_FLOAT_KERNEL MlasSgemmKernelZero;
MLAS_GEMM_FLOAT_KERNEL MlasSgemmKernelAdd;
Expand Down Expand Up @@ -1886,7 +1887,7 @@ MlasStoreAlignedFloat64x2(double* Buffer, MLAS_FLOAT64X2 Vector)
#if defined(MLAS_SSE2_INTRINSICS)
_mm_store_pd(Buffer, Vector);
#elif defined(MLAS_VSX_INTRINSICS)
vec_st(Vector, 0, Buffer);
*((MLAS_FLOAT64X2*)Buffer) = Vector;
#endif
}

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ Return Value:
bool HasP10Instructions = ((hwcap2 & PPC_FEATURE2_MMA) && (hwcap2 & PPC_FEATURE2_ARCH_3_1));
if (HasP10Instructions) {
this->GemmFloatKernel = MlasSgemmKernelPOWER10;
this->GemmDoubleKernel = MlasDgemmKernelPOWER10;
}
#endif
#endif
Expand Down
Loading

0 comments on commit 8564fc1

Please sign in to comment.