forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AMDGPU: Support buffer_atomic_pk_add_bf16 for gfx950 (#117599)
Co-authored-by: Sirish Pande <Sirish.Pande@amd.com>
- Loading branch information
Showing
10 changed files
with
224 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG | ||
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL | ||
|
||
declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg) | ||
declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32) | ||
|
||
define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { | ||
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret: | ||
; GFX950-SDAG: ; %bb.0: | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 | ||
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0 | ||
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 | ||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) | ||
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 | ||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; GFX950-SDAG-NEXT: ; return to shader part epilog | ||
; | ||
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret: | ||
; GFX950-GISEL: ; %bb.0: | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 | ||
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen sc0 | ||
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 | ||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) | ||
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 | ||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; GFX950-GISEL-NEXT: ; return to shader part epilog | ||
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) | ||
store <2 x bfloat> %orig, ptr null | ||
ret float 1.0 | ||
} | ||
|
||
define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { | ||
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret: | ||
; GFX950-SDAG: ; %bb.0: | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 | ||
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen | ||
; GFX950-SDAG-NEXT: s_endpgm | ||
; | ||
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret: | ||
; GFX950-GISEL: ; %bb.0: | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 | ||
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen | ||
; GFX950-GISEL-NEXT: s_endpgm | ||
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) | ||
ret void | ||
} | ||
|
||
define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { | ||
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16: | ||
; GFX950-SDAG: ; %bb.0: | ||
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen | ||
; GFX950-SDAG-NEXT: s_endpgm | ||
; | ||
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16: | ||
; GFX950-GISEL: ; %bb.0: | ||
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen | ||
; GFX950-GISEL-NEXT: s_endpgm | ||
%ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) | ||
ret void | ||
} | ||
|
||
define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { | ||
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret: | ||
; GFX950-SDAG: ; %bb.0: | ||
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 | ||
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 | ||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) | ||
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 | ||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 | ||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; GFX950-SDAG-NEXT: ; return to shader part epilog | ||
; | ||
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret: | ||
; GFX950-GISEL: ; %bb.0: | ||
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 | ||
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 | ||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) | ||
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 | ||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 | ||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; GFX950-GISEL-NEXT: ; return to shader part epilog | ||
%orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) | ||
store <2 x bfloat> %orig, ptr null | ||
ret float 1.0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.