Skip to content
This repository has been archived by the owner on Nov 19, 2020. It is now read-only.

Commit

Permalink
Frustum and backface culling mesh shading
Browse files Browse the repository at this point in the history
  • Loading branch information
VZout committed Dec 11, 2019
1 parent cff12c7 commit 535ce16
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 5 deletions.
14 changes: 14 additions & 0 deletions src/meshlet_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,20 @@ struct MeshletDesc
m_y |= Pack(bboxMax[0], 8, 0) | Pack(bboxMax[1], 8, 8) | Pack(bboxMax[2], 8, 16);
}

void SetCone(int8_t coneOctX, int8_t coneOctY, int8_t minusSinAngle)
{
uint8_t anglebits = minusSinAngle;
m_z |= Pack(coneOctX, 8, 20) | Pack((anglebits >> 0) & 0xF, 4, 28);
m_w |= Pack(coneOctY, 8, 20) | Pack((anglebits >> 4) & 0xF, 4, 28);
}

void GetCone(int8_t& coneOctX, int8_t& coneOctY, int8_t& minusSinAngle) const
{
coneOctX = Unpack(m_z, 8, 20);
coneOctY = Unpack(m_w, 8, 20);
minusSinAngle = Unpack(m_z, 4, 28) | (Unpack(m_w, 4, 28) << 4);
}

void GetBBox(uint8_t bboxMin[3], uint8_t bboxMax[3]) const
{
bboxMin[0] = Unpack(m_x, 8, 0);
Expand Down
122 changes: 122 additions & 0 deletions src/model_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,65 @@ inline glm::vec4 GetBoxCorner(glm::vec3 bboxMin, glm::vec3 bboxMax, int n)
}
}

// all oct functions derived from "A Survey of Efficient Representations for Independent Unit Vectors"
// http://jcgt.org/published/0003/02/01/paper.pdf
inline glm::vec3 OctSignNotZero(glm::vec3 v)
{
// leaves z as is
return glm::vec3((v.x >= 0.0f) ? +1.0f : -1.0f, (v.y >= 0.0f) ? +1.0f : -1.0f, 1.0f);
}

inline glm::vec3 OctToFVec3(glm::vec3 e)
{
auto v = glm::vec3(e.x, e.y, 1.0f - fabsf(e.x) - fabsf(e.y));
if (v.z < 0.0f)
{
v = glm::vec3(1.0f - fabs(v.y), 1.0f - fabs(v.x), v.z) * OctSignNotZero(v);
}
return glm::normalize(v);
}

inline glm::vec3 FVec3ToOct(glm::vec3 v)
{
// Project the sphere onto the octahedron, and then onto the xy plane
glm::vec3 p = glm::vec3(v.x, v.y, 0) * (1.0f / (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)));
// Reflect the folds of the lower hemisphere over the diagonals
return (v.z <= 0.0f) ? glm::vec3(1.0f - fabsf(p.y), 1.0f - fabsf(p.x), 0.0f) * OctSignNotZero(p) : p;
}

inline glm::vec3 FVec3ToOctnPrecise(glm::vec3 v, const int n)
{
glm::vec3 s = FVec3ToOct(v); // Remap to the square
// Each snorm's max value interpreted as an integer,
// e.g., 127.0 for snorm8
float M = float(1 << ((n / 2) - 1)) - 1.0;
// Remap components to snorm(n/2) precision...with floor instead
// of round (see equation 1)
s = glm::floor(glm::clamp(s, glm::vec3(-1.0f), glm::vec3(1.0f)) * M) * glm::vec3(1.0 / M);
glm::vec3 bestRepresentation = s;
float highestCosine = glm::dot(OctToFVec3(s), v);
// Test all combinations of floor and ceil and keep the best.
// Note that at +/- 1, this will exit the square... but that
// will be a worse encoding and never win.
for (int i = 0; i <= 1; ++i)
for (int j = 0; j <= 1; ++j)
// This branch will be evaluated at compile time
if ((i != 0) || (j != 0))
{
// Offset the bit pattern (which is stored in floating
// point!) to effectively change the rounding mode
// (when i or j is 0: floor, when it is one: ceiling)
glm::vec3 candidate = glm::vec3(i, j, 0) * (1 / M) + s;
float cosine = glm::dot(OctToFVec3(candidate), v);
if (cosine > highestCosine)
{
bestRepresentation = candidate;
highestCosine = cosine;
}
}
return bestRepresentation;
}

template<typename T>
void ModelPool::RegisterLoader()
{
Expand Down Expand Up @@ -281,6 +340,9 @@ ModelHandle ModelPool::LoadWithMaterials(ModelData* data,
if constexpr (HasBitangent<V_T>::value) { vertices[i].m_bitangent = mesh.m_bitangents[i]; }
}

glm::vec3 average_normal(0);
std::vector<glm::vec3> tri_normals;

// Calculate objectbbox
glm::vec3 object_bbox_min = glm::vec3(std::numeric_limits<float>::max());
glm::vec3 object_bbox_max = glm::vec3(-std::numeric_limits<float>::max());
Expand All @@ -299,6 +361,25 @@ ModelHandle ModelPool::LoadWithMaterials(ModelData* data,
object_bbox_max = glm::max(object_bbox_max, vertices[triangle[1]].m_pos);
object_bbox_max = glm::max(object_bbox_max, vertices[triangle[2]].m_pos);
}

// cone
{
glm::vec3 cross = glm::cross(vertices[triangle[1]].m_pos - vertices[triangle[0]].m_pos, vertices[triangle[2]].m_pos - vertices[triangle[0]].m_pos);
float length = glm::length(cross);

glm::vec3 normal;
if (length > FLT_EPSILON)
{
normal = cross * (1.0f / length);
}
else
{
normal = cross;
}

average_normal += normal;
tri_normals.push_back(normal);
}
}

// Generate meshlets
Expand Down Expand Up @@ -438,6 +519,47 @@ ModelHandle ModelPool::LoadWithMaterials(ModelData* data,

meshlet.SetBBox(grid_min, grid_max);

// potential improvement, instead of average maybe use
// http://www.cs.technion.ac.il/~cggc/files/gallery-pdfs/Barequet-1.pdf
float len = glm::length(average_normal);
if (len > FLT_EPSILON)
{
average_normal = average_normal / len;
}
else
{
average_normal = glm::vec3(0.0f);
}

glm::vec3 packed = FVec3ToOctnPrecise(average_normal, 16);
std::int8_t cone_x = std::min(127, std::max(-127, std::int32_t(packed.x * 127.0f)));
std::int8_t cone_y = std::min(127, std::max(-127, std::int32_t(packed.y * 127.0f)));

// post quantization normal
average_normal = OctToFVec3(glm::vec3(float(cone_x) / 127.0f, float(cone_y) / 127.0f, 0.0f));

float mindot = 1.0f;
for (auto const & n : tri_normals)
{
mindot = std::min(mindot, glm::dot(n, average_normal));
}

// apply safety delta due to quantization
mindot -= 1.0f / 127.0f;
mindot = std::max(-1.0f, mindot);

// positive value for cluster not being backface cullable (normals > 90)
std::int8_t cone_angle = 127;
if (mindot > 0)
{
// otherwise store -sin(cone angle)
// we test against dot product (cosine) so this is equivalent to cos(cone angle + 90°)
float angle = -sinf(acosf(mindot));
cone_angle = std::max(-127, std::min(127, int32_t(angle * 127.0f)));
}

meshlet.SetCone(cone_x, cone_y, cone_angle);

meshlet_data.push_back(meshlet);
}

Expand Down
4 changes: 2 additions & 2 deletions src/shaders/instancing_task.comp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ taskNV out Task
uint num_meshlets;
} OUT;


void main()
{
const uint base_id = gl_WorkGroupID.x * GROUP_SIZE;
Expand All @@ -49,7 +48,8 @@ void main()
mat4 model = ubo.model[instance_id];
uvec4 meshlet_desc = mb.meshlet_descs[meshlet_id];

bool render = !(global_id > total_meshlet_count || EarlyCull(meshlet_desc, model, camera.proj * camera.view));
mat4 inverse_view = inverse(camera.view);
bool render = !(global_id > total_meshlet_count || EarlyCull(meshlet_desc, model, vec3(inverse_view[3]), camera.proj * camera.view));
uvec4 vote = subgroupBallot(render);
uint tasks = subgroupBallotBitCount(vote);

Expand Down
44 changes: 42 additions & 2 deletions src/shaders/mesh_shader_util.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,60 @@ uint GetCullBits(vec4 hPos)
return cullBits;
}

bool EarlyCull(uvec4 meshlet_desc, mat4 model, mat4 viewproj_mat)
// oct_ code from "A Survey of Efficient Representations for Independent Unit Vectors"
// http://jcgt.org/published/0003/02/01/paper.pdf
vec2 oct_signNotZero(vec2 v) {
return vec2((v.x >= 0.0) ? +1.0 : -1.0, (v.y >= 0.0) ? +1.0 : -1.0);
}

vec3 oct_to_vec3(vec2 e) {
vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y));
if (v.z < 0) v.xy = (1.0 - abs(v.yx)) * oct_signNotZero(v.xy);

return normalize(v);
}

void DecodeNormalAngle(uvec4 meshlet_desc, out vec3 normal, out float angle)
{
uint packed_vec = (((meshlet_desc.z >> 20) & 0xFF) << 0) |
(((meshlet_desc.w >> 20) & 0xFF) << 8) |
(((meshlet_desc.z >> 28) ) << 16) |
(((meshlet_desc.w >> 28) ) << 20);

vec3 unpacked_vec = unpackSnorm4x8(packed_vec).xyz;

float winding = 1.f;
normal = oct_to_vec3(unpacked_vec.xy) * winding;
angle = unpacked_vec.z;
}

bool EarlyCull(uvec4 meshlet_desc, mat4 model, vec3 view_pos, mat4 viewproj_mat)
{
vec3 bbox_min;
vec3 bbox_max;
DecodeBbox(meshlet_desc, bbox_min, bbox_max);

uint frustum_bits = ~0;
bool backface = false;

// Early backface culling
vec3 o_group_normal;
float angle;
DecodeNormalAngle(meshlet_desc, o_group_normal, angle);
vec3 w_group_normal = normalize(inverse(transpose(mat3(model))) * o_group_normal);
backface = angle < 0;

for (int n = 0; n < 8; n++)
{
// Early frustum culling
vec4 w_pos = model * GetBoxCorner(bbox_min, bbox_max, n);
vec4 h_pos = viewproj_mat * w_pos;
frustum_bits &= GetCullBits(h_pos);

// Early backface culling
vec3 w_dir = normalize(view_pos - w_pos.xyz);
backface = backface && (dot(w_group_normal, w_dir) < angle);
}

return frustum_bits != 0;
return (frustum_bits != 0 || backface);
}
2 changes: 1 addition & 1 deletion tests/demo/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ class Demo : public Application

bool m_viewport_has_changed = false;

fg_manager::FGType m_fg_type = fg_manager::FGType::RAYTRACING;
fg_manager::FGType m_fg_type = fg_manager::FGType::PBR_MESH_SHADING;
};


Expand Down

0 comments on commit 535ce16

Please sign in to comment.