Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging dev into main #74

Merged
merged 35 commits into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e91fbe3
Version bump before sync to main.
jbikker Dec 18, 2024
115f2a3
Update README.md
jbikker Dec 18, 2024
2aaff3e
Merge branch 'dev' of https://github.com/jbikker/tinybvh into dev
jbikker Dec 18, 2024
e89bf4c
Minimalist wavefront code.
jbikker Dec 18, 2024
2a49133
Error in windows window.
jbikker Dec 18, 2024
dd8dc99
Disabling BuildNEON for now.
jbikker Dec 19, 2024
299bc27
Functional basic gpu wavefront path tracer.
jbikker Dec 19, 2024
f8098a2
Next Event Estimation for wavefront pt.
jbikker Dec 19, 2024
e554bbf
New screenshot.
jbikker Dec 19, 2024
e711d22
Update README.md
jbikker Dec 19, 2024
2affe64
Fix for second bounce.
jbikker Dec 20, 2024
c8c6093
NEE now accounts for BRDF.
jbikker Dec 20, 2024
f204b7f
Specular support in wavefront pt.
jbikker Dec 20, 2024
2372756
native_recip and fast_normalize
benanil Dec 20, 2024
9bca1e9
Merge pull request #71 from benanil/dev
jbikker Dec 20, 2024
71e50d4
Blue noise to play with.
jbikker Dec 20, 2024
c86ac2a
Blue noise sampling for NEE & bounce.
jbikker Dec 20, 2024
a848a16
Update README.md
jbikker Dec 21, 2024
392d450
Optimized cwbvh for ~10% faster traversal.
jbikker Dec 21, 2024
8feb5c2
Merge branch 'dev' of https://github.com/jbikker/tinybvh into dev
jbikker Dec 21, 2024
d73d549
Merge branch 'dev' of https://github.com/jbikker/tinybvh into dev
jbikker Dec 21, 2024
4e51514
Save/load for cwbvh, proper path state in wavefront.
jbikker Dec 22, 2024
8846bbe
Merged.
jbikker Dec 22, 2024
d04ae10
MIS for wavefront path tracer.
jbikker Dec 22, 2024
91d1a1a
Better documented code.
jbikker Dec 22, 2024
ed3451c
Minor comments.
jbikker Dec 22, 2024
cc624f9
Fixed MIS.
jbikker Dec 23, 2024
473fbc4
Update README.md
jbikker Dec 23, 2024
b1db88a
Update README.md
jbikker Dec 23, 2024
e22f885
Update README.md
jbikker Dec 23, 2024
36caa36
Update README.md
jbikker Dec 23, 2024
38e50d5
More MIS fixes.
jbikker Dec 23, 2024
a659545
Merge branch 'dev' of https://github.com/jbikker/tinybvh into dev
jbikker Dec 23, 2024
87b9189
tinby_bvh_gpu now compiles on Linux; CodeMaid applied.
jbikker Dec 26, 2024
437ce4f
Update README.md
jbikker Dec 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Better documented code.
  • Loading branch information
jbikker committed Dec 22, 2024
commit 91d1a1a2644d21d512070f0601de369cda3e4cd4
39 changes: 39 additions & 0 deletions tools.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Common constants
#define PI 3.14159265358979323846264f
#define INVPI 0.31830988618379067153777f
#define INV2PI 0.15915494309189533576888f
#define TWOPI 6.28318530717958647692528f
#define EPSILON 0.0001f // for a 100^3 world

// Xor32 RNG
uint WangHash( uint s ) { s = (s ^ 61) ^ (s >> 16), s *= 9, s = s ^ (s >> 4), s *= 0x27d4eb2d; return s ^ (s >> 15); }
uint RandomUInt( uint* seed ) { *seed ^= *seed << 13, * seed ^= *seed >> 17, * seed ^= *seed << 5; return *seed; }
float RandomFloat( uint* seed ) { return RandomUInt( seed ) * 2.3283064365387e-10f; }

// Color conversion
float3 rgb32_to_vec3( uint c )
{
return (float3)((float)((c >> 16) & 255), (float)((c >> 8) & 255), (float)(c & 255)) * 0.00392f;
}

// Specular reflection
float3 Reflect( const float3 D, const float3 N ) { return D - 2.0f * N * dot( N, D ); }

// DiffuseReflection: Uniform random bounce in the hemisphere
float3 DiffuseReflection( float3 N, uint* seed )
{
float3 R;
do
{
R = (float3)(RandomFloat( seed ) * 2 - 1, RandomFloat( seed ) * 2 - 1, RandomFloat( seed ) * 2 - 1);
} while (dot( R, R ) > 1);
return fast_normalize( dot( R, N ) > 0 ? R : -R );
}

// CosWeightedDiffReflection: Cosine-weighted random bounce in the hemisphere
float3 CosWeightedDiffReflection( const float3 N, const float r0, const float r1 )
{
const float r = sqrt( 1 - r1 * r1 ), phi = 4 * PI * r0;
const float3 R = (float3)( cos( phi ) * r, sin( phi ) * r, r1);
return fast_normalize( N + R );
}
1 change: 1 addition & 0 deletions vcproj/tiny_bvh_gpu.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<ClInclude Include="..\tiny_ocl.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\tools.cl" />
<None Include="..\traverse.cl" />
<None Include="..\wavefront.cl" />
</ItemGroup>
Expand Down
2 changes: 2 additions & 0 deletions vcproj/tiny_bvh_gpu.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\tiny_bvh.h" />
<ClInclude Include="..\tiny_ocl.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\traverse.cl" />
<None Include="..\wavefront.cl" />
<None Include="..\tools.cl" />
</ItemGroup>
</Project>
170 changes: 65 additions & 105 deletions wavefront.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable

#include "traverse.cl"
#include "tools.cl"

#define PI 3.14159265358979323846264f
#define INVPI 0.31830988618379067153777f
#define INV2PI 0.15915494309189533576888f
#define TWOPI 6.28318530717958647692528f
#define EPSILON 0.0001f // for a 100^3 world
#define PATH_LAST_SPECULAR 1 // previous vertex was camera or mirror
#define PATH_VIA_DIFFUSE 2 // path has at least one diffuse vertex

// struct for rendering parameters - keep in sync with CPU side.
#define MATERIAL_LIGHT 1 // material emits light - end of path
#define MATERIAL_SPECULAR 2 // material is pure specular

// struct for rendering parameters
struct RenderData
{
// camera setup
Expand All @@ -21,15 +22,11 @@ struct RenderData
global float4* cwbvhTris;
global uint* blueNoise;
};
const float3 lightColor = (float3)(20,20,18);

__global volatile int extendTasks, shadeTasks, connectTasks;
__global struct RenderData rd;

// Xor32 RNG
uint WangHash( uint s ) { s = (s ^ 61) ^ (s >> 16), s *= 9, s = s ^ (s >> 4), s *= 0x27d4eb2d; return s ^ (s >> 15); }
uint RandomUInt( uint* seed ) { *seed ^= *seed << 13, * seed ^= *seed >> 17, * seed ^= *seed << 5; return *seed; }
float RandomFloat( uint* seed ) { return RandomUInt( seed ) * 2.3283064365387e-10f; }

// Blue noise interface for fixed 128x128x8 dataset.
float2 Noise( const uint x, const uint y, const uint page /* 0..7 */ )
{
Expand All @@ -39,37 +36,7 @@ float2 Noise( const uint x, const uint y, const uint page /* 0..7 */ )
return (float2)( (float)r * 0.00392f, (float)g * 0.00392f );
}

// Color conversion
float3 rgb32_to_vec3( uint c )
{
return (float3)((float)((c >> 16) & 255), (float)((c >> 8) & 255), (float)(c & 255)) * 0.00392f;
}

// Specular reflection
float3 Reflect( const float3 D, const float3 N ) { return D - 2.0f * N * dot( N, D ); }

// DiffuseReflection: Uniform random bounce in the hemisphere
float3 DiffuseReflection( float3 N, uint* seed )
{
float3 R;
do
{
R = (float3)(RandomFloat( seed ) * 2 - 1, RandomFloat( seed ) * 2 - 1, RandomFloat( seed ) * 2 - 1);
} while (dot( R, R ) > 1);
return fast_normalize( dot( R, N ) > 0 ? R : -R );
}

// CosWeightedDiffReflection: Cosine-weighted random bounce in the hemisphere
float3 CosWeightedDiffReflection( const float3 N, const float r0, const float r1 )
{
const float r = sqrt( 1 - r1 * r1 ), phi = 4 * PI * r0;
const float3 R = (float3)( cos( phi ) * r, sin( phi ) * r, r1);
return fast_normalize( N + R );
}

// PathState: path throughput, current extension ray, pixel index
#define PATH_LAST_SPECULAR 1
#define PATH_VIA_DIFFUSE 2
struct PathState
{
float4 T; // xyz = rgb, postponed MIS pdf in w
Expand All @@ -90,24 +57,20 @@ struct Potential
void kernel SetRenderData( int primaryRayCount,
float4 eye, float4 p0, float4 p1, float4 p2,
uint frameIdx, uint width, uint height,
global float4* cwbvhNodes, global float4* cwbvhTris,
global uint* blueNoise
global float4* cwbvhNodes, global float4* cwbvhTris, global uint* blueNoise
)
{
if (get_global_id( 0 ) != 0) return;
// set camera parameters
rd.eye = eye;
rd.p0 = p0, rd.p1 = p1, rd.p2 = p2;
rd.eye = eye, rd.p0 = p0, rd.p1 = p1, rd.p2 = p2;
rd.frameIdx = frameIdx;
rd.width = width;
rd.height = height;
rd.width = width, rd.height = height;
// set BVH pointers
rd.cwbvhNodes = cwbvhNodes;
rd.cwbvhTris = cwbvhTris;
rd.blueNoise = blueNoise;
// initialize atomic counters
extendTasks = primaryRayCount;
shadeTasks = primaryRayCount;
extendTasks = shadeTasks = primaryRayCount;
connectTasks = 0;
}

Expand Down Expand Up @@ -136,12 +99,15 @@ void kernel Generate( global struct PathState* raysOut, uint frameSeed )
// extend: trace the generated rays to find the nearest intersection point.
void kernel Extend( global struct PathState* raysIn )
{
// we use a worker thread system here, where a fixed number of threads 'fight for food'
// by decreasing an atomic counter. This way, the counter can stay on the GPU, saving
// expensive transfers: The host doesn't need to know the exact amount of tasks.
while (1)
{
// obtain task
if (extendTasks < 1) break;
const int pathId = atomic_dec( &extendTasks ) - 1;
if (pathId < 0) break;
if (pathId < 0) break; // someone else could have decreased it before us.
const float4 O4 = raysIn[pathId].O;
const float4 D4 = raysIn[pathId].D;
const float3 rD = native_recip( D4.xyz );
Expand All @@ -150,18 +116,13 @@ void kernel Extend( global struct PathState* raysIn )
}

// syncing counters: at this point, we need to reset the extendTasks and connectTasks counters.
void kernel UpdateCounters1()
{
if (get_global_id( 0 ) != 0) return;
extendTasks = 0;
}
void kernel UpdateCounters1() { if (get_global_id( 0 ) == 0) extendTasks = 0; }

// shade: process intersection results; this evaluates the BRDF and creates
// extension rays and shadow rays.
void kernel Shade( global float4* accumulator,
global struct PathState* raysIn, global struct PathState* raysOut,
global struct Potential* shadowOut,
global float4* verts, uint sampleIdx
global struct Potential* shadowOut, global float4* verts, uint sampleIdx
)
{
while (1)
Expand Down Expand Up @@ -192,47 +153,32 @@ void kernel Shade( global float4* accumulator,
// fetch geometry at intersection point
uint vertIdx = as_uint( hit.w ) * 3;
float4 v0 = verts[vertIdx];
uint mat = as_uint( v0.w ) >> 24;
// end path on light
float3 lightColor = (float3)(20);
uint materialType = as_uint( v0.w ) >> 24;
float hemiPDF = T4.w;
float3 D = D4.xyz;
if (mat == 1 /* light source */)
// end path on light
if (materialType == MATERIAL_LIGHT)
{
float3 potential;
float MISweight;
if (pathState & PATH_LAST_SPECULAR)
{
potential = T * lightColor;
// we came via a mirror; there is no alternative technique.
MISweight = 1;
}
else
{
float lightDistance = D4.w;
float lightArea = 9 * 5;
float NLdotL = fabs( D.y ); // actually: fabs( dot( D, NL ) );
// two techniques could have taken us here; apply MIS.
float lightDistance = D4.w, lightArea = 9 * 5, NLdotL = fabs( D.y ); // actually: dot( D, NL ).
float solidAngle = min( TWOPI, lightArea * (1.0f / (lightDistance * lightDistance)) * NLdotL );
float lightPDF = 1 / solidAngle;
potential = T * (1 / (lightPDF + hemiPDF)) * lightColor;
MISweight = 1 / (lightPDF + hemiPDF);
}
accumulator[pixelIdx] += (float4)(potential, 1);
accumulator[pixelIdx] += (float4)(T * MISweight * lightColor, 1);
continue;
}
// apply postponed hemisphere PDF
T *= 1.0f / hemiPDF;
float3 vert0 = v0.xyz, vert1 = verts[vertIdx + 1].xyz, vert2 = verts[vertIdx + 2].xyz;
float3 materialColor = rgb32_to_vec3( as_uint( v0.w ) ); // material color
float3 N = fast_normalize( cross( vert1 - vert0, vert2 - vert0 ) );
if (dot( N, D ) > 0) N *= -1;
float3 I = O4.xyz + t * D;
// handle pure specular BRDF
if (mat == 2)
{
uint newRayIdx = atomic_inc( &extendTasks );
float3 R = Reflect( D, N );
raysOut[newRayIdx].T = (float4)(T * materialColor, 1);
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 8) + ((depth + 1) << 4) + PATH_LAST_SPECULAR ));
raysOut[newRayIdx].D = (float4)(R, 1e30f);
continue;
}
// direct illumination: next event estimation
// generate four random numbers
float r0, r1, r2, r3;
if (depth == 0 && sampleIdx < 4)
{
Expand All @@ -246,22 +192,41 @@ void kernel Shade( global float4* accumulator,
r0 = RandomFloat( &seed ), r1 = RandomFloat( &seed );
r2 = RandomFloat( &seed ), r3 = RandomFloat( &seed );
}
float3 P = (float3)(r0 * 9.0f - 4.5f, 30, r1 * 5.0f - 3.5f);
float3 L = P - I;
float NdotL = dot( N, L );
// prepare data for bounce
float3 vert0 = v0.xyz, vert1 = verts[vertIdx + 1].xyz, vert2 = verts[vertIdx + 2].xyz;
float3 I = O4.xyz + t * D;
float3 N = fast_normalize( cross( vert1 - vert0, vert2 - vert0 ) );
if (dot( N, D ) > 0) N *= -1;
float3 materialColor = rgb32_to_vec3( as_uint( v0.w ) );
float3 BRDF = materialColor * INVPI; // lambert BRDF: albedo / pi
if (NdotL > 0)
// direct illumination: next event estimation
if (materialType != MATERIAL_SPECULAR)
{
uint newShadowIdx = atomic_inc( &connectTasks );
float dist2 = dot( L, L ), dist = sqrt( dist2 );
L *= native_recip( dist );
float NLdotL = fabs( L.y ); // actually, fabs( dot( L, LN ) )
shadowOut[newShadowIdx].T = (float4)(lightColor * BRDF * T * NdotL * NLdotL * native_recip( dist2 ), 0);
shadowOut[newShadowIdx].O = (float4)(I + L * EPSILON, as_float( pixelIdx ));
shadowOut[newShadowIdx].D = (float4)(L, dist - 2 * EPSILON);
float3 P = (float3)(r0 * 9.0f - 4.5f, 30, r1 * 5.0f - 3.5f);
float3 L = P - I;
float NdotL = dot( N, L );
if (NdotL > 0)
{
uint newShadowIdx = atomic_inc( &connectTasks );
float dist2 = dot( L, L ), dist = sqrt( dist2 );
L *= native_recip( dist );
float NLdotL = fabs( L.y ); // actually, fabs( dot( L, LN ) )
shadowOut[newShadowIdx].T = (float4)(lightColor * BRDF * T * NdotL * NLdotL * native_recip( dist2 ), 0);
shadowOut[newShadowIdx].O = (float4)(I + L * EPSILON, as_float( pixelIdx ));
shadowOut[newShadowIdx].D = (float4)(L, dist - 2 * EPSILON);
}
}
// handle pure specular BRDF
if (depth >= 3) continue;
if (materialType == MATERIAL_SPECULAR)
{
uint newRayIdx = atomic_inc( &extendTasks );
float3 R = Reflect( D, N );
raysOut[newRayIdx].T = (float4)(T * materialColor, 1);
raysOut[newRayIdx].O = (float4)(I + R * EPSILON, as_float( (pixelIdx << 8) + ((depth + 1) << 4) + PATH_LAST_SPECULAR ));
raysOut[newRayIdx].D = (float4)(R, 1e30f);
}
// indirect illumination: diffuse bounce
if (depth < 3 && (pathState & PATH_VIA_DIFFUSE) == 0 )
else /* materialType == MATERIAL_DIFFUSE */ if ((pathState & PATH_VIA_DIFFUSE) == 0 )
{
uint newRayIdx = atomic_inc( &extendTasks );
float3 R = CosWeightedDiffReflection( N, r2, r3 );
Expand Down Expand Up @@ -291,15 +256,10 @@ void kernel Connect( global float4* accumulator, global struct Potential* shadow
if (connectTasks < 1) break;
const int rayId = atomic_dec( &connectTasks ) - 1;
if (rayId < 0) break;
const float4 T4 = shadowIn[rayId].T;
const float4 O4 = shadowIn[rayId].O;
const float4 D4 = shadowIn[rayId].D;
const float4 T4 = shadowIn[rayId].T, O4 = shadowIn[rayId].O, D4 = shadowIn[rayId].D;
const float3 rD = native_recip( D4.xyz );
if (!isoccluded_cwbvh( rd.cwbvhNodes, rd.cwbvhTris, O4.xyz, D4.xyz, rD, D4.w ))
{
uint pixelIdx = as_uint( O4.w );
accumulator[pixelIdx] += T4;
}
if (isoccluded_cwbvh( rd.cwbvhNodes, rd.cwbvhTris, O4.xyz, D4.xyz, rD, D4.w )) continue;
accumulator[as_uint( O4.w )] += T4;
}
}

Expand Down
Loading