Skip to content

Commit

Permalink
AMDGPU: Support buffer_atomic_pk_add_bf16 for gfx950 (#117599)
Browse files Browse the repository at this point in the history
Co-authored-by: Sirish Pande <Sirish.Pande@amd.com>
  • Loading branch information
arsenm and srpande authored Nov 26, 2024
1 parent 716364e commit 7fc71f7
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 10 deletions.
6 changes: 3 additions & 3 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Expand All @@ -109,8 +109,8 @@
// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true"

// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"

Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;

// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+.
// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+.
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;

class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
Expand Down Expand Up @@ -1527,7 +1527,7 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
ImmArg<ArgIndex<6>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;

// gfx908 intrinsic
// gfx908 intrinsic. Supports v2bf16 on gfx12+ and gfx950
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureFP4ConversionScaleInsts,
FeatureFP6BF6ConversionScaleInsts,
FeatureDot12Insts,
FeatureDot13Insts
FeatureDot13Insts,
FeatureAtomicBufferPkAddBF16Inst
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3296,6 +3296,8 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>;


defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_vi <0x52>;

defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;

let SubtargetPredicate = isGFX90APlus in {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-ds-pk-add-16-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
Features["atomic-buffer-global-pk-add-f16-insts"] = true;
Features["atomic-buffer-pk-add-bf16-inst"] = true;
Features["atomic-global-pk-add-bf16-inst"] = true;
Features["16-bit-insts"] = true;
Features["dpp"] = true;
Expand Down Expand Up @@ -479,6 +480,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["ashr-pk-insts"] = true;
Features["dot12-insts"] = true;
Features["dot13-insts"] = true;
Features["atomic-buffer-pk-add-bf16-inst"] = true;
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL

declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)

define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen sc0
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}

define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2
; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen
; GFX950-SDAG-NEXT: s_endpgm
;
; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1
; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen
; GFX950-GISEL-NEXT: s_endpgm
%orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}

define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX950-SDAG-NEXT: s_endpgm
;
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX950-GISEL-NEXT: s_endpgm
%ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}

define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
store <2 x bfloat> %orig, ptr null
ret float 1.0
}
8 changes: 4 additions & 4 deletions llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s
Original file line number Diff line number Diff line change
Expand Up @@ -2636,16 +2636,16 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 th:TH_ATOMIC_CASCA
// GFX12: encoding: [0x03,0x80,0x16,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f]

buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:8388607
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:8388607
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:8388607
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:8388607
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode

buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 glc
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -1089,3 +1089,63 @@ v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02]
v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03]
buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095

// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7]
buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -341,3 +341,15 @@ v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 clamp div:2

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 slc

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc
Loading

0 comments on commit 7fc71f7

Please sign in to comment.