Skip to content

Commit

Permalink
Implement GLSL_EXT_shader_atomic_float2
Browse files Browse the repository at this point in the history
  • Loading branch information
gfxstrand authored and cmarcelo committed Jun 16, 2021
1 parent a23e143 commit d352577
Show file tree
Hide file tree
Showing 9 changed files with 319 additions and 24 deletions.
2 changes: 2 additions & 0 deletions SPIRV/GLSL.ext.EXT.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fu
static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density";
static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation";
static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add";
static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add";
static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max";
static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64";

#endif // #ifndef GLSLextEXT_H
42 changes: 35 additions & 7 deletions SPIRV/GlslangToSpv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6900,13 +6900,17 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
case glslang::EOpImageAtomicAdd:
case glslang::EOpAtomicCounterAdd:
opCode = spv::OpAtomicIAdd;
if (typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
opCode = spv::OpAtomicFAddEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_add);
if (typeProxy == glslang::EbtFloat)
if (typeProxy == glslang::EbtFloat16) {
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add);
builder.addCapability(spv::CapabilityAtomicFloat16AddEXT);
} else if (typeProxy == glslang::EbtFloat) {
builder.addCapability(spv::CapabilityAtomicFloat32AddEXT);
else
} else {
builder.addCapability(spv::CapabilityAtomicFloat64AddEXT);
}
}
break;
case glslang::EOpAtomicSubtract:
Expand All @@ -6916,14 +6920,38 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
case glslang::EOpAtomicMin:
case glslang::EOpImageAtomicMin:
case glslang::EOpAtomicCounterMin:
opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ?
spv::OpAtomicUMin : spv::OpAtomicSMin;
if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
opCode = spv::OpAtomicFMinEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
if (typeProxy == glslang::EbtFloat16)
builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
else if (typeProxy == glslang::EbtFloat)
builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
else
builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT);
} else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) {
opCode = spv::OpAtomicUMin;
} else {
opCode = spv::OpAtomicSMin;
}
break;
case glslang::EOpAtomicMax:
case glslang::EOpImageAtomicMax:
case glslang::EOpAtomicCounterMax:
opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ?
spv::OpAtomicUMax : spv::OpAtomicSMax;
if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
opCode = spv::OpAtomicFMaxEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
if (typeProxy == glslang::EbtFloat16)
builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
else if (typeProxy == glslang::EbtFloat)
builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
else
builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT);
} else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) {
opCode = spv::OpAtomicUMax;
} else {
opCode = spv::OpAtomicSMax;
}
break;
case glslang::EOpAtomicAnd:
case glslang::EOpImageAtomicAnd:
Expand Down
16 changes: 16 additions & 0 deletions SPIRV/doc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,8 +966,12 @@ const char* CapabilityString(int info)

case CapabilityIntegerFunctions2INTEL: return "CapabilityIntegerFunctions2INTEL";

case CapabilityAtomicFloat16AddEXT: return "AtomicFloat16AddEXT";
case CapabilityAtomicFloat32AddEXT: return "AtomicFloat32AddEXT";
case CapabilityAtomicFloat64AddEXT: return "AtomicFloat64AddEXT";
case CapabilityAtomicFloat16MinMaxEXT: return "AtomicFloat16MinMaxEXT";
case CapabilityAtomicFloat32MinMaxEXT: return "AtomicFloat32MinMaxEXT";
case CapabilityAtomicFloat64MinMaxEXT: return "AtomicFloat64MinMaxEXT";

case CapabilityWorkgroupMemoryExplicitLayoutKHR: return "CapabilityWorkgroupMemoryExplicitLayoutKHR";
case CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: return "CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR";
Expand Down Expand Up @@ -1352,6 +1356,8 @@ const char* OpcodeString(int op)
case 4432: return "OpSubgroupReadInvocationKHR";

case OpAtomicFAddEXT: return "OpAtomicFAddEXT";
case OpAtomicFMinEXT: return "OpAtomicFMinEXT";
case OpAtomicFMaxEXT: return "OpAtomicFMaxEXT";

case 5000: return "OpGroupIAddNonUniformAMD";
case 5001: return "OpGroupFAddNonUniformAMD";
Expand Down Expand Up @@ -2342,6 +2348,16 @@ void Parameterize()
InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'");
InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'");

InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Pointer'");
InstructionDesc[OpAtomicFMinEXT].operands.push(OperandScope, "'Scope'");
InstructionDesc[OpAtomicFMinEXT].operands.push(OperandMemorySemantics, "'Semantics'");
InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Value'");

InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Pointer'");
InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandScope, "'Scope'");
InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandMemorySemantics, "'Semantics'");
InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Value'");

InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'");
InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'");
InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'");
Expand Down
26 changes: 20 additions & 6 deletions SPIRV/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ enum FPRoundingMode {
enum LinkageType {
LinkageTypeExport = 0,
LinkageTypeImport = 1,
LinkageTypeLinkOnceODR = 2,
LinkageTypeMax = 0x7fffffff,
};

Expand Down Expand Up @@ -1011,8 +1012,12 @@ enum Capability {
CapabilityFunctionPointersINTEL = 5603,
CapabilityIndirectReferencesINTEL = 5604,
CapabilityAsmINTEL = 5606,
CapabilityAtomicFloat32MinMaxEXT = 5612,
CapabilityAtomicFloat64MinMaxEXT = 5613,
CapabilityAtomicFloat16MinMaxEXT = 5616,
CapabilityVectorComputeINTEL = 5617,
CapabilityVectorAnyINTEL = 5619,
CapabilityExpectAssumeKHR = 5629,
CapabilitySubgroupAvcMotionEstimationINTEL = 5696,
CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
Expand All @@ -1036,6 +1041,7 @@ enum Capability {
CapabilityAtomicFloat32AddEXT = 6033,
CapabilityAtomicFloat64AddEXT = 6034,
CapabilityLongConstantCompositeINTEL = 6089,
CapabilityAtomicFloat16AddEXT = 6095,
CapabilityMax = 0x7fffffff,
};

Expand Down Expand Up @@ -1103,15 +1109,15 @@ enum FragmentShadingRateMask {
};

enum FPDenormMode {
FPDenormModePreserve = 0,
FPDenormModeFlushToZero = 1,
FPDenormModeMax = 0x7fffffff,
FPDenormModePreserve = 0,
FPDenormModeFlushToZero = 1,
FPDenormModeMax = 0x7fffffff,
};

enum FPOperationMode {
FPOperationModeIEEE = 0,
FPOperationModeALT = 1,
FPOperationModeMax = 0x7fffffff,
FPOperationModeIEEE = 0,
FPOperationModeALT = 1,
FPOperationModeMax = 0x7fffffff,
};

enum Op {
Expand Down Expand Up @@ -1538,6 +1544,10 @@ enum Op {
OpAsmTargetINTEL = 5609,
OpAsmINTEL = 5610,
OpAsmCallINTEL = 5611,
OpAtomicFMinEXT = 5614,
OpAtomicFMaxEXT = 5615,
OpAssumeTrueKHR = 5630,
OpExpectKHR = 5631,
OpDecorateString = 5632,
OpDecorateStringGOOGLE = 5632,
OpMemberDecorateString = 5633,
Expand Down Expand Up @@ -2120,6 +2130,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break;
case OpAsmINTEL: *hasResult = true; *hasResultType = true; break;
case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break;
case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break;
case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break;
case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break;
case OpExpectKHR: *hasResult = true; *hasResultType = true; break;
case OpDecorateString: *hasResult = false; *hasResultType = false; break;
case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
Expand Down
179 changes: 179 additions & 0 deletions Test/spv.atomicFloat2.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#version 450 core

#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#extension GL_EXT_shader_atomic_float2: enable
#pragma use_vulkan_memory_model

layout(local_size_x = 16, local_size_y = 16) in;

layout(binding = 0) buffer Buffer
{
float16_t datah;
float dataf;
double datad;
} buf;

shared float16_t atomh;
shared float atomf;
shared double atomd;

layout(binding = 0, r32f) volatile coherent uniform image1D fimage1D;
layout(binding = 1, r32f) volatile coherent uniform image1DArray fimage1DArray;
layout(binding = 2, r32f) volatile coherent uniform image2D fimage2D;
layout(binding = 3, r32f) volatile coherent uniform image2DArray fimage2DArray;
layout(binding = 4, r32f) volatile coherent uniform image2DRect fimage2DRect;
layout(binding = 5, r32f) volatile coherent uniform imageCube fimageCube;
layout(binding = 6, r32f) volatile coherent uniform imageCubeArray fimageCubeArray;
layout(binding = 9, r32f) volatile coherent uniform image3D fimage3D;

void main()
{
//atomicAdd
float16_t resulth = float16_t(0.0);
resulth = atomicAdd(atomh, float16_t(3.0));
resulth = atomicAdd(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resulth = atomicAdd(buf.datah, float16_t(3.0));
resulth = atomicAdd(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

//atomicMin
resulth = atomicMin(atomh, float16_t(3.0));
resulth = atomicMin(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resulth = atomicMin(buf.datah, float16_t(3.0));
resulth = atomicMin(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

float resultf = 0.0;
resultf = atomicMin(atomf, 3.0);
resultf = atomicMin(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resultf = atomicMin(buf.dataf, 3.0);
resultf = atomicMin(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

double resultd = 0.0;
resultd = atomicMin(atomd, 3.0);
resultd = atomicMin(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resultd = atomicMin(buf.datad, 3.0);
resultd = atomicMin(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

//atomicMax
resulth = atomicMax(atomh, float16_t(3.0));
resulth = atomicMax(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resulth = atomicMax(buf.datah, float16_t(3.0));
resulth = atomicMax(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

resultf = atomicMax(atomf, 3.0);
resultf = atomicMax(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resultf = atomicMax(buf.dataf, 3.0);
resultf = atomicMax(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

resultd = atomicMax(atomd, 3.0);
resultd = atomicMax(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);
resultd = atomicMax(buf.datad, 3.0);
resultd = atomicMax(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed);

//atomicExchange
resulth = atomicExchange(buf.datah, resulth);
buf.datah += resulth;
resulth = atomicExchange(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
buf.datah += resulth;
resulth = atomicExchange(atomh, resulth);
buf.datah += resulth;
resulth = atomicExchange(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
buf.datah += resulth;

//atomic load/store
resulth = atomicLoad(buf.datah, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
atomicStore(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
buf.datah += resulth;

resulth = atomicLoad(atomh, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
atomicStore(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed);
buf.datah += resulth;

// image atomics on 1D:
atomf = imageAtomicMin(fimage1D, int(0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage1D, int(0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on 1D Array:
atomf = imageAtomicMin(fimage1DArray, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage1DArray, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on 2D:
atomf = imageAtomicMin(fimage2D, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage2D, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on 2D Rect:
atomf = imageAtomicMin(fimage2DRect, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage2DRect, ivec2(0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on 2D Array:
atomf = imageAtomicMin(fimage2DArray, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage2DArray, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on Cube:
atomf = imageAtomicMin(fimageCube, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimageCube, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on Cube Array:
atomf = imageAtomicMin(fimageCubeArray, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimageCubeArray, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

// image atomics on 3D:
atomf = imageAtomicMin(fimage3D, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMin(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;

atomf = imageAtomicMax(fimage3D, ivec3(0,0,0), 2.0);
buf.dataf += atomf;
atomf = imageAtomicMax(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed);
buf.dataf += atomf;
}
Loading

0 comments on commit d352577

Please sign in to comment.