Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Readback stencil buffer for debugger on GLES #16198

Merged
merged 4 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Common/GPU/OpenGL/GLFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,8 @@ void CheckGLExtensions() {
}
if (gl_extensions.VersionGEThan(4, 3)) {
gl_extensions.ARB_copy_image = true;
gl_extensions.ARB_stencil_texturing = true;
// ARB_explicit_uniform_location = true;
// ARB_stencil_texturing = true;
// ARB_texture_view = true;
// ARB_vertex_attrib_binding = true;
}
Expand Down
1 change: 1 addition & 0 deletions Common/GPU/OpenGL/GLFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct GLExtensions {
bool ARB_depth_clamp;
bool ARB_uniform_buffer_object;
bool ARB_texture_non_power_of_two;
bool ARB_stencil_texturing;

// EXT
bool EXT_swap_control_tear;
Expand Down
16 changes: 15 additions & 1 deletion Common/GPU/OpenGL/GLQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
#elif !defined(GL_CLIP_DISTANCE0)
#define GL_CLIP_DISTANCE0 0x3000
#endif
#ifndef GL_DEPTH_STENCIL_TEXTURE_MODE
#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA
#endif
#ifndef GL_STENCIL_INDEX
#define GL_STENCIL_INDEX 0x1901
#endif

static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16;

Expand Down Expand Up @@ -1114,8 +1120,16 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->z_stencil_texture.texture);
curTex[slot] = &c.bind_fb_texture.framebuffer->z_stencil_texture;
}
// This should be uncommon, so always set the mode.
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
} else if (c.bind_fb_texture.aspect == GL_STENCIL_BUFFER_BIT) {
if (curTex[slot] != &c.bind_fb_texture.framebuffer->z_stencil_texture) {
glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->z_stencil_texture.texture);
curTex[slot] = &c.bind_fb_texture.framebuffer->z_stencil_texture;
}
// This should be uncommon, so always set the mode.
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
} else {
// Can't texture from stencil buffers.
curTex[slot] = nullptr;
}
CHECK_GL_ERROR_IF_DEBUG();
Expand Down
14 changes: 12 additions & 2 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2578,6 +2578,10 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G
// No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, "GetStencilbuffer");
if (!retval) {
// Try ReadbackStencilbufferSync, in case GLES.
retval = ReadbackStencilbufferSync(vfb->fbo, 0, 0, w, h, buffer.GetData(), w);
}
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
return retval;
Expand Down Expand Up @@ -2652,6 +2656,10 @@ bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, i
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync");
}

bool FramebufferManagerCommon::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
}

void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) {
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
if (x + w >= vfb->bufferWidth) {
Expand Down Expand Up @@ -2807,8 +2815,10 @@ void FramebufferManagerCommon::DeviceLost() {
DoRelease(reinterpretFromTo_[i][j]);
}
}
DoRelease(stencilUploadSampler_);
DoRelease(stencilUploadPipeline_);
DoRelease(stencilWriteSampler_);
DoRelease(stencilWritePipeline_);
DoRelease(stencilReadbackSampler_);
DoRelease(stencilReadbackPipeline_);
DoRelease(depthReadbackSampler_);
DoRelease(depthReadbackPipeline_);
DoRelease(draw2DPipelineColor_);
Expand Down
8 changes: 6 additions & 2 deletions GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ class FramebufferManagerCommon {
virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
// Used for when a shader is required, such as GLES.
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
void SetViewport2D(int x, int y, int w, int h);
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
Expand Down Expand Up @@ -568,9 +569,12 @@ class FramebufferManagerCommon {

// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
// critical either.
Draw::Pipeline *stencilUploadPipeline_ = nullptr;
Draw::SamplerState *stencilUploadSampler_ = nullptr;
Draw::Pipeline *stencilWritePipeline_ = nullptr;
Draw::SamplerState *stencilWriteSampler_ = nullptr;

// Used on GLES where we can't directly readback depth or stencil, but here for simplicity.
Draw::Pipeline *stencilReadbackPipeline_ = nullptr;
Draw::SamplerState *stencilReadbackSampler_ = nullptr;
Draw::Pipeline *depthReadbackPipeline_ = nullptr;
Draw::SamplerState *depthReadbackSampler_ = nullptr;

Expand Down
12 changes: 6 additions & 6 deletions GPU/Common/StencilCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
shaderManager_->DirtyLastShader();
textureCache_->ForgetLastTexture();

if (!stencilUploadPipeline_) {
if (!stencilWritePipeline_) {
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();

char *fsCode = new char[8192];
Expand Down Expand Up @@ -237,8 +237,8 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
{ stencilUploadVs, stencilUploadFs },
inputLayout, stencilWrite, blendOff, rasterNoCull, &stencilUBDesc,
};
stencilUploadPipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");
_assert_(stencilUploadPipeline_);
stencilWritePipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");
_assert_(stencilWritePipeline_);

delete[] fsCode;
delete[] vsCode;
Expand All @@ -252,7 +252,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
stencilUploadVs->Release();

SamplerStateDesc descNearest{};
stencilUploadSampler_ = draw_->CreateSamplerState(descNearest);
stencilWriteSampler_ = draw_->CreateSamplerState(descNearest);
}

// Fullscreen triangle coordinates.
Expand Down Expand Up @@ -297,11 +297,11 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
}

draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilUploadSampler_);
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilWriteSampler_);

// We must bind the program after starting the render pass, and set the color mask after clearing.
draw_->SetScissorRect(0, 0, w, h);
draw_->BindPipeline(stencilUploadPipeline_);
draw_->BindPipeline(stencilWritePipeline_);

for (int i = 1; i < values; i += i) {
if (!(usedBits & i)) {
Expand Down
193 changes: 155 additions & 38 deletions GPU/GLES/DepthBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,94 @@ const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), {
{ "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 },
} };

static const char *stencil_dl_fs = R"(
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#endif
#if __VERSION__ >= 130
#define varying in
#define texture2D texture
#define gl_FragColor fragColor0
out vec4 fragColor0;
#endif
varying vec2 v_texcoord;
lowp uniform usampler2D tex;
void main() {
uint stencil = texture2D(tex, v_texcoord).r;
float scaled = float(stencil) / 255.0;
gl_FragColor = vec4(scaled, scaled, scaled, scaled);
}
)";

static const char *stencil_vs = R"(
#ifdef GL_ES
precision highp float;
#endif
#if __VERSION__ >= 130
#define attribute in
#define varying out
#endif
attribute vec2 a_position;
varying vec2 v_texcoord;
void main() {
v_texcoord = a_position * 2.0;
gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);
}
)";

static bool SupportsDepthTexturing() {
if (gl_extensions.IsGLES) {
return gl_extensions.OES_packed_depth_stencil && (gl_extensions.OES_depth_texture || gl_extensions.GLES3);
}
return gl_extensions.VersionGEThan(3, 0);
}

static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) {
using namespace Draw;

const ShaderLanguageDesc &shaderLanguageDesc = draw->GetShaderLanguageDesc();

ShaderModule *readbackFs = draw->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fs, strlen(fs), fsTag);
ShaderModule *readbackVs = draw->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vs, strlen(vs), vsTag);
_assert_(readbackFs && readbackVs);

InputLayoutDesc desc = {
{
{ 8, false },
},
{
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
},
};
InputLayout *inputLayout = draw->CreateInputLayout(desc);

BlendState *blendOff = draw->CreateBlendState({ false, 0xF });
DepthStencilState *stencilIgnore = draw->CreateDepthStencilState({});
RasterState *rasterNoCull = draw->CreateRasterState({});

PipelineDesc readbackDesc{
Primitive::TRIANGLE_LIST,
{ readbackVs, readbackFs },
inputLayout, stencilIgnore, blendOff, rasterNoCull, ubDesc,
};
Draw::Pipeline *pipeline = draw->CreateGraphicsPipeline(readbackDesc, tag);
_assert_(pipeline);

rasterNoCull->Release();
blendOff->Release();
stencilIgnore->Release();
inputLayout->Release();

readbackFs->Release();
readbackVs->Release();

return pipeline;
}

bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) {
using namespace Draw;

Expand All @@ -117,44 +198,8 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int

if (useColorPath) {
if (!depthReadbackPipeline_) {
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();

ShaderModule *depthReadbackFs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)depth_dl_fs, strlen(depth_dl_fs), "depth_dl_fs");
ShaderModule *depthReadbackVs = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)depth_vs, strlen(depth_vs), "depth_vs");
_assert_(depthReadbackFs && depthReadbackVs);

InputLayoutDesc desc = {
{
{ 8, false },
},
{
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
},
};
InputLayout *inputLayout = draw_->CreateInputLayout(desc);

BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
DepthStencilState *stencilIgnore = draw_->CreateDepthStencilState({});
RasterState *rasterNoCull = draw_->CreateRasterState({});

PipelineDesc depthReadbackDesc{
Primitive::TRIANGLE_LIST,
{ depthReadbackVs, depthReadbackFs },
inputLayout, stencilIgnore, blendOff, rasterNoCull, &depthUBDesc,
};
depthReadbackPipeline_ = draw_->CreateGraphicsPipeline(depthReadbackDesc, "depth_dl");
_assert_(depthReadbackPipeline_);

rasterNoCull->Release();
blendOff->Release();
stencilIgnore->Release();
inputLayout->Release();

depthReadbackFs->Release();
depthReadbackVs->Release();

SamplerStateDesc descNearest{};
depthReadbackSampler_ = draw_->CreateSamplerState(descNearest);
depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs");
depthReadbackSampler_ = draw_->CreateSamplerState({});
}

shaderManager_->DirtyLastShader();
Expand Down Expand Up @@ -242,3 +287,75 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
return true;
}

// Well, this is not depth, but it's depth/stencil related.
bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
using namespace Draw;

if (!fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "ReadbackStencilbufferSync: bad fbo");
return false;
}

const bool useColorPath = gl_extensions.IsGLES;
if (!useColorPath) {
return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
}

// Unsupported below GLES 3.1 or without ARB_stencil_texturing.
// OES_texture_stencil8 is related, but used to specify texture data.
if ((gl_extensions.IsGLES && !gl_extensions.VersionGEThan(3, 1)) && !gl_extensions.ARB_stencil_texturing)
return false;

// Pixel size always 4 here because we always request RGBA back.
const u32 bufSize = w * h * 4;
if (!convBuf_ || convBufSize_ < bufSize) {
delete[] convBuf_;
convBuf_ = new u8[bufSize];
convBufSize_ = bufSize;
}

if (!stencilReadbackPipeline_) {
stencilReadbackPipeline_ = CreateReadbackPipeline(draw_, "stencil_dl", &depthUBDesc, stencil_dl_fs, "stencil_dl_fs", stencil_vs, "stencil_vs");
stencilReadbackSampler_ = draw_->CreateSamplerState({});
}

shaderManager_->DirtyLastShader();
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height());
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync");
Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &viewport);

draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0);
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_);

// We must bind the program after starting the render pass.
draw_->SetScissorRect(0, 0, w, h);
draw_->BindPipeline(stencilReadbackPipeline_);

// Fullscreen triangle coordinates.
static const float positions[6] = {
0.0, 0.0,
1.0, 0.0,
0.0, 1.0,
};
draw_->DrawUP(positions, 3);

draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync");

textureCache_->ForgetLastTexture();

// TODO: Use 1/4 width to write all values directly and skip CPU conversion?
uint8_t *dest = pixels;
const u32_le *packed32 = (u32_le *)convBuf_;
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
dest[xp] = packed32[xp] & 0xFF;
}
dest += pixelsStride;
packed32 += w;
}

gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
return true;
}
1 change: 1 addition & 0 deletions GPU/GLES/FramebufferManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class FramebufferManagerGLES : public FramebufferManagerCommon {
protected:
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;
bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override;
bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) override;

private:
u8 *convBuf_ = nullptr;
Expand Down