Implement GLSL_EXT_shader_atomic_float2

qingyuanzNV · Jun 16, 2021 · d352577 · d352577
1 parent a23e143
commit d352577
Show file tree

Hide file tree

Showing 9 changed files with 319 additions and 24 deletions.
diff --git a/SPIRV/GLSL.ext.EXT.h b/SPIRV/GLSL.ext.EXT.h
@@ -36,6 +36,8 @@ static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fu
 static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density";
 static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation";
 static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add";
+static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add";
+static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max";
 static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64";
 
 #endif  // #ifndef GLSLextEXT_H
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
@@ -6900,13 +6900,17 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
     case glslang::EOpImageAtomicAdd:
     case glslang::EOpAtomicCounterAdd:
         opCode = spv::OpAtomicIAdd;
-        if (typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
+        if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
             opCode = spv::OpAtomicFAddEXT;
             builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_add);
-            if (typeProxy == glslang::EbtFloat)
+            if (typeProxy == glslang::EbtFloat16) {
+                builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add);
+                builder.addCapability(spv::CapabilityAtomicFloat16AddEXT);
+            } else if (typeProxy == glslang::EbtFloat) {
                 builder.addCapability(spv::CapabilityAtomicFloat32AddEXT);
-            else
+            } else {
                 builder.addCapability(spv::CapabilityAtomicFloat64AddEXT);
+            }
         }
         break;
     case glslang::EOpAtomicSubtract:
@@ -6916,14 +6920,38 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
     case glslang::EOpAtomicMin:
     case glslang::EOpImageAtomicMin:
     case glslang::EOpAtomicCounterMin:
-        opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ?
-            spv::OpAtomicUMin : spv::OpAtomicSMin;
+        if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
+            opCode = spv::OpAtomicFMinEXT;
+            builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
+            if (typeProxy == glslang::EbtFloat16)
+                builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
+            else if (typeProxy == glslang::EbtFloat)
+                builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
+            else
+                builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT);
+        } else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) {
+            opCode = spv::OpAtomicUMin;
+        } else {
+            opCode = spv::OpAtomicSMin;
+        }
         break;
     case glslang::EOpAtomicMax:
     case glslang::EOpImageAtomicMax:
     case glslang::EOpAtomicCounterMax:
-        opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ?
-            spv::OpAtomicUMax : spv::OpAtomicSMax;
+        if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
+            opCode = spv::OpAtomicFMaxEXT;
+            builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
+            if (typeProxy == glslang::EbtFloat16)
+                builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
+            else if (typeProxy == glslang::EbtFloat)
+                builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
+            else
+                builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT);
+        } else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) {
+            opCode = spv::OpAtomicUMax;
+        } else {
+            opCode = spv::OpAtomicSMax;
+        }
         break;
     case glslang::EOpAtomicAnd:
     case glslang::EOpImageAtomicAnd:

diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp
@@ -966,8 +966,12 @@ const char* CapabilityString(int info)
 
     case CapabilityIntegerFunctions2INTEL:              return "CapabilityIntegerFunctions2INTEL";
 
+    case CapabilityAtomicFloat16AddEXT:                     return "AtomicFloat16AddEXT";
     case CapabilityAtomicFloat32AddEXT:                     return "AtomicFloat32AddEXT";
     case CapabilityAtomicFloat64AddEXT:                     return "AtomicFloat64AddEXT";
+    case CapabilityAtomicFloat16MinMaxEXT:                  return "AtomicFloat16MinMaxEXT";
+    case CapabilityAtomicFloat32MinMaxEXT:                  return "AtomicFloat32MinMaxEXT";
+    case CapabilityAtomicFloat64MinMaxEXT:                  return "AtomicFloat64MinMaxEXT";
 
     case CapabilityWorkgroupMemoryExplicitLayoutKHR:            return "CapabilityWorkgroupMemoryExplicitLayoutKHR";
     case CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR:  return "CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR";
@@ -1352,6 +1356,8 @@ const char* OpcodeString(int op)
     case 4432: return "OpSubgroupReadInvocationKHR";
 
     case OpAtomicFAddEXT: return "OpAtomicFAddEXT";
+    case OpAtomicFMinEXT: return "OpAtomicFMinEXT";
+    case OpAtomicFMaxEXT: return "OpAtomicFMaxEXT";
 
     case 5000: return "OpGroupIAddNonUniformAMD";
     case 5001: return "OpGroupFAddNonUniformAMD";
@@ -2342,6 +2348,16 @@ void Parameterize()
     InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'");
     InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'");
 
+    InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Pointer'");
+    InstructionDesc[OpAtomicFMinEXT].operands.push(OperandScope, "'Scope'");
+    InstructionDesc[OpAtomicFMinEXT].operands.push(OperandMemorySemantics, "'Semantics'");
+    InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Value'");
+
+    InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Pointer'");
+    InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandScope, "'Scope'");
+    InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandMemorySemantics, "'Semantics'");
+    InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Value'");
+
     InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'");
     InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'");
     InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'");

diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp
@@ -410,6 +410,7 @@ enum FPRoundingMode {
 enum LinkageType {
     LinkageTypeExport = 0,
     LinkageTypeImport = 1,
+    LinkageTypeLinkOnceODR = 2,
     LinkageTypeMax = 0x7fffffff,
 };
 
@@ -1011,8 +1012,12 @@ enum Capability {
     CapabilityFunctionPointersINTEL = 5603,
     CapabilityIndirectReferencesINTEL = 5604,
     CapabilityAsmINTEL = 5606,
+    CapabilityAtomicFloat32MinMaxEXT = 5612,
+    CapabilityAtomicFloat64MinMaxEXT = 5613,
+    CapabilityAtomicFloat16MinMaxEXT = 5616,
     CapabilityVectorComputeINTEL = 5617,
     CapabilityVectorAnyINTEL = 5619,
+    CapabilityExpectAssumeKHR = 5629,
     CapabilitySubgroupAvcMotionEstimationINTEL = 5696,
     CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
     CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
@@ -1036,6 +1041,7 @@ enum Capability {
     CapabilityAtomicFloat32AddEXT = 6033,
     CapabilityAtomicFloat64AddEXT = 6034,
     CapabilityLongConstantCompositeINTEL = 6089,
+    CapabilityAtomicFloat16AddEXT = 6095,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -1103,15 +1109,15 @@ enum FragmentShadingRateMask {
 };
 
 enum FPDenormMode {
-  FPDenormModePreserve = 0,
-  FPDenormModeFlushToZero = 1,
-  FPDenormModeMax = 0x7fffffff,
+    FPDenormModePreserve = 0,
+    FPDenormModeFlushToZero = 1,
+    FPDenormModeMax = 0x7fffffff,
 };
 
 enum FPOperationMode {
-  FPOperationModeIEEE = 0,
-  FPOperationModeALT = 1,
-  FPOperationModeMax = 0x7fffffff,
+    FPOperationModeIEEE = 0,
+    FPOperationModeALT = 1,
+    FPOperationModeMax = 0x7fffffff,
 };
 
 enum Op {
@@ -1538,6 +1544,10 @@ enum Op {
     OpAsmTargetINTEL = 5609,
     OpAsmINTEL = 5610,
     OpAsmCallINTEL = 5611,
+    OpAtomicFMinEXT = 5614,
+    OpAtomicFMaxEXT = 5615,
+    OpAssumeTrueKHR = 5630,
+    OpExpectKHR = 5631,
     OpDecorateString = 5632,
     OpDecorateStringGOOGLE = 5632,
     OpMemberDecorateString = 5633,
@@ -2120,6 +2130,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
     case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break;
     case OpAsmINTEL: *hasResult = true; *hasResultType = true; break;
     case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break;
+    case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break;
+    case OpExpectKHR: *hasResult = true; *hasResultType = true; break;
     case OpDecorateString: *hasResult = false; *hasResultType = false; break;
     case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
     case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;

diff --git a/Test/spv.atomicFloat2.comp b/Test/spv.atomicFloat2.comp
@@ -0,0 +1,179 @@
+#version 450 core
+
+#extension GL_KHR_memory_scope_semantics : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
+#extension GL_EXT_shader_atomic_float2: enable
+#pragma use_vulkan_memory_model
+
+layout(local_size_x = 16, local_size_y = 16) in;
+
+layout(binding = 0) buffer Buffer
+{
+    float16_t datah;
+    float dataf;
+    double datad;
+} buf;
+
+shared float16_t    atomh;
+shared float        atomf;
+shared double       atomd;
+
+layout(binding = 0, r32f) volatile coherent uniform image1D        fimage1D;
+layout(binding = 1, r32f) volatile coherent uniform image1DArray   fimage1DArray;
+layout(binding = 2, r32f) volatile coherent uniform image2D        fimage2D;
+layout(binding = 3, r32f) volatile coherent uniform image2DArray   fimage2DArray;
+layout(binding = 4, r32f) volatile coherent uniform image2DRect    fimage2DRect;
+layout(binding = 5, r32f) volatile coherent uniform imageCube      fimageCube;
+layout(binding = 6, r32f) volatile coherent uniform imageCubeArray fimageCubeArray;
+layout(binding = 9, r32f) volatile coherent uniform image3D        fimage3D;
+
+void main()
+{
+    //atomicAdd
+    float16_t resulth = float16_t(0.0);
+    resulth = atomicAdd(atomh, float16_t(3.0));
+    resulth = atomicAdd(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resulth = atomicAdd(buf.datah, float16_t(3.0));
+    resulth = atomicAdd(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    //atomicMin
+    resulth = atomicMin(atomh, float16_t(3.0));
+    resulth = atomicMin(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resulth = atomicMin(buf.datah, float16_t(3.0));
+    resulth = atomicMin(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    float resultf = 0.0;
+    resultf = atomicMin(atomf, 3.0);
+    resultf = atomicMin(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resultf = atomicMin(buf.dataf, 3.0);
+    resultf = atomicMin(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    double resultd = 0.0;
+    resultd = atomicMin(atomd, 3.0);
+    resultd = atomicMin(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resultd = atomicMin(buf.datad, 3.0);
+    resultd = atomicMin(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    //atomicMax
+    resulth = atomicMax(atomh, float16_t(3.0));
+    resulth = atomicMax(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resulth = atomicMax(buf.datah, float16_t(3.0));
+    resulth = atomicMax(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    resultf = atomicMax(atomf, 3.0);
+    resultf = atomicMax(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resultf = atomicMax(buf.dataf, 3.0);
+    resultf = atomicMax(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    resultd = atomicMax(atomd, 3.0);
+    resultd = atomicMax(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+    resultd = atomicMax(buf.datad, 3.0);
+    resultd = atomicMax(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
+
+    //atomicExchange
+    resulth = atomicExchange(buf.datah, resulth);
+    buf.datah += resulth;
+    resulth = atomicExchange(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    buf.datah += resulth;
+    resulth = atomicExchange(atomh, resulth);
+    buf.datah += resulth;
+    resulth = atomicExchange(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    buf.datah += resulth;
+
+    //atomic load/store
+    resulth = atomicLoad(buf.datah, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    atomicStore(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    buf.datah += resulth;
+
+    resulth = atomicLoad(atomh, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    atomicStore(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
+    buf.datah += resulth;
+
+    // image atomics on 1D:
+    atomf = imageAtomicMin(fimage1D, int(0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage1D, int(0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on 1D Array:
+    atomf = imageAtomicMin(fimage1DArray, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage1DArray, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on 2D:
+    atomf = imageAtomicMin(fimage2D, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage2D, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on 2D Rect:
+    atomf = imageAtomicMin(fimage2DRect, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage2DRect, ivec2(0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on 2D Array:
+    atomf = imageAtomicMin(fimage2DArray, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage2DArray, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on Cube:
+    atomf = imageAtomicMin(fimageCube, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimageCube, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on Cube Array:
+    atomf = imageAtomicMin(fimageCubeArray, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimageCubeArray, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    // image atomics on 3D:
+    atomf = imageAtomicMin(fimage3D, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMin(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+
+    atomf = imageAtomicMax(fimage3D, ivec3(0,0,0), 2.0);
+    buf.dataf += atomf;
+    atomf = imageAtomicMax(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
+    buf.dataf += atomf;
+}