In this project I built a GPGPU based ray tracer using OpenGL 4.4 and compute shaders. The ray tracing consist of a 3 step pipeline.
Ray creation stage – Creates the rays from the cameras look, up and right vectors.
#version 430 core
// UNIFORMS
uniform vec3 camera_right;
uniform vec3 camera_up;
uniform vec3 camera_position;
uniform vec3 camera_direction;
uniform bool clear_image;
layout (local_size_x = 16, local_size_y = 16) in;
// IMG
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 7) uniform image2D output_image;
void main(void)
{
float width = gl_WorkGroupSize.x*gl_NumWorkGroups.x; // pixels across
float height = gl_WorkGroupSize.y*gl_NumWorkGroups.y; // pixels high
float normalized_i = (gl_GlobalInvocationID.x / height) - (width/height)*0.5;
float normalized_j = (gl_GlobalInvocationID.y / height) - 0.5;
vec3 image_point = normalized_i * camera_right +
normalized_j * camera_up +
camera_direction;
vec3 ray_direction = normalize(image_point);
vec3 ray_o = camera_position;
vec3 ray_d = ray_direction;
imageStore(
ray_dirbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(ray_direction.x, ray_direction.y, ray_direction.z, 0.0)
);
imageStore(
ray_posbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(camera_position.x, camera_position.y, camera_position.z, 0.0)
);
if (!clear_image) return;
imageStore(
output_image,
ivec2(gl_GlobalInvocationID.xy),
vec4(0.0)
);
}
Intersection stage – Checks the rays collision against the meshes’ octree and traverses to the leaf node consisting of a triangle, and stores the hit-data for that ray.
#version 430 core
struct IsectData
{
float u;
float v;
float t;
float i;
};
IsectData data;
struct OcTree
{
vec3 _min;
int start;
vec3 _max;
int stop;
vec3 padding;
int childrenCount;
};
struct Vertex
{
vec3 p;
float u;
vec3 n;
float v;
vec4 t;
vec4 b;
vec4 c;
};
// UNIFORMS
layout (local_size_x = 16, local_size_y = 16) in;
// OcTrees
layout (std430, binding = 1) buffer OcTreeBuffer { OcTree ocs []; };
// Vertices
layout (std140, binding = 2) buffer VertexBuffer { Vertex verts []; };
// input
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 5) uniform image2D hitbuffer;
// output
layout (rgba32f, binding = 7) uniform image2D output_image;
void RayVsOcTree(vec3 ray_pos, vec3 ray_dir);
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1);
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float i);
void main(void)
{
vec3 ray_d = imageLoad(ray_dirbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec3 ray_o = imageLoad(ray_posbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
data = IsectData(0,0,30,-1);
int i = 0;
RayVsOcTree(ray_o, ray_d);
imageStore(
hitbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(data.u,data.v,data.t,data.i)
);
}
void RayVsOcTree(vec3 ray_pos, vec3 ray_dir)
{
for (int i = 0; i < ocs.length(); i++)
{
// Check collsion
if (RayVsAABB(ray_pos,ray_dir,ocs[i]._min,ocs[i]._max))
{
// IS IT LEAF NODE?
for (int j = ocs[i].start; j < ocs[i].stop; j+=3)
{
RayVsTriangle(ray_pos,ray_dir,verts[j],verts[j+1],verts[j+2], float(j));
}
}
else
i += ocs[i].childrenCount;
}
}
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1)
{
vec3 invdir = 1.0f / ray_dir.xyz;
float t1 = (pbounds0.x - ray_pos.x)*invdir.x;
float t2 = (pbounds1.x - ray_pos.x)*invdir.x;
float t3 = (pbounds0.y - ray_pos.y)*invdir.y;
float t4 = (pbounds1.y - ray_pos.y)*invdir.y;
float t5 = (pbounds0.z - ray_pos.z)*invdir.z;
float t6 = (pbounds1.z - ray_pos.z)*invdir.z;
float tmin = max(max(min(t1, t2), min(t3, t4)), min(t5, t6));
float tmax = min(min(max(t1, t2), max(t3, t4)), max(t5, t6));
if(tmax < 0 || tmin > tmax || tmin > data.t)
return false;
return true;
}
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float i)
{
vec3 e1 = v1.p - v0.p;
vec3 e2 = v2.p - v0.p;
vec3 q = cross(ray_dir, e2);
float a = dot(e1, q);
if (a == 0) return false;
float f = 1/a;
vec3 s = ray_pos - v0.p;
float u = f*(dot(s, q));
if (u < 0 || u > 1) return false;
vec3 p = cross(s, e1);
float v = f*(dot(ray_dir, p));
if (v < 0 || u + v > 1) return false;
float t = dot(e2, p) * f;
if (t < 0.0001) return false;
if (t >= data.t) return false;
data.u = u;
data.v = v;
data.t = t;
data.i = i;
return true;
};
Color stage – Samples normal map and texture for the hit position and creates and stores a new ray for reflection
#version 430 core
#extension GL_EXT_texture_array : enable
#extension GL_NV_texture_array : enable
struct PointLight
{
vec4 Diffuse;
vec4 Specular;
vec3 Position;
float Range;
};
struct DirectionalLight
{
vec4 Diffuse;
vec4 Specular;
vec4 Direction;
};
struct Object
{
ivec4 info;
};
struct OcTree
{
vec3 _min;
int start;
vec3 _max;
int stop;
vec3 padding;
int childrenCount;
};
struct Vertex
{
vec3 p;
float u;
vec3 n;
float v;
vec4 t;
vec4 b;
vec4 c;
};
// UNIFORMS
layout (local_size_x = 16, local_size_y = 16) in;
// AABB Buffer
layout (std430, binding = 1) buffer OcTreeBuffer { OcTree ocs []; };
// Vertices
layout (std140, binding = 2) buffer VertexBuffer { Vertex verts []; };
// Point Lights
layout (std430, binding = 6) buffer PointLightBuffer { PointLight lights []; };
// Texture
uniform sampler2DArray v_Texture;
uniform unsigned int nrofLights;
// IMG
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 5) uniform image2D hitbuffer;
// output
layout (rgba32f, binding = 7) uniform image2D output_image;
// FUNCTIONS
void GetTextures(int vertexid, out int tex, out int norm);
bool RayVsOcTree(vec3 ray_pos, vec3 ray_dir, int ocid);
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1);
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float d);
void ComputePointLight(PointLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec);
void ComputeDirectionalLight(DirectionalLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec);
void main(void)
{
// Sample from imagebuffers
vec3 ray_d = imageLoad(ray_dirbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec3 ray_o = imageLoad(ray_posbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec4 d = imageLoad(hitbuffer, ivec2(gl_GlobalInvocationID.xy)).xyzw;
vec3 prev_color = imageLoad(output_image, ivec2(gl_GlobalInvocationID.xy)).xyz;
// Store hitresult
float u = d.x;
float v = d.y;
float t = d.z;
int vertid = int(d.w);
// If no hit do nothing
if (vertid < 0) return;
// Get textures
int textureid, normalid;
GetTextures(vertid, textureid, normalid);
// Calculate uv
vec2 uv = (1 - u - v) * vec2(verts[vertid].u,verts[vertid].v);
uv += u * vec2(verts[vertid+1].u,verts[vertid+1].v);
uv += v * vec2(verts[vertid+2].u,verts[vertid+2].v);
// Sample texture- and normal map
vec3 color = texture2DArray(v_Texture, vec3(uv, textureid)).xyz + verts[vertid].c.xyz;//vec3(0,uv.x,uv.y);
vec3 normal = (1 - u - v) * verts[vertid].n + u * verts[vertid+1].n + v * verts[vertid+2].n;
vec3 tangent = (1 - u - v) * verts[vertid].t.xyz + u * verts[vertid+1].t.xyz + v * verts[vertid+2].t.xyz;
vec3 bitang = (1 - u - v) * verts[vertid].b.xyz + u * verts[vertid+1].b.xyz + v * verts[vertid+2].b.xyz;
// normalize normal (just to be sure)
normal = normalize(normal);
tangent = normalize(tangent);
bitang = normalize(bitang);
// sample normal map
if ( normalid > -1 )
{
vec3 normal_map = texture2DArray(v_Texture, vec3(uv, normalid)).xyz;
normal_map = (normal_map * 2.0f) - 1.0f;
mat3 texSpace = mat3(tangent, bitang, normal);//biTangent, input.Norm);
normal = normalize(texSpace*normal_map);
}
// calculate new direction of ray
vec3 dir = ray_d - dot(ray_d, normal) * 2 * normal;
// calculate new position of ray
vec3 pos = ray_o+ray_d*t;
// LIGHT CALC
vec4 diffuse = vec4(0, 0.1, 0.2, 0);
vec4 specular = vec4(0, 0, 0, 0);
// hardcoded directional light
DirectionalLight dirLight = DirectionalLight(vec4(0.2,0.2,0.1,1), vec4(0.2,0.2,0.2,1), normalize(vec4(1,0.2,0.5,1)));
vec4 Diff, Spec;
ComputeDirectionalLight(dirLight, pos, normal, -ray_d, Diff, Spec);
diffuse += Diff;
specular += Spec;
// loop the pointlights
for (int i = 0; i < nrofLights; i++)
{
vec4 Diff, Spec;
ComputePointLight(lights[i], pos, normal, -ray_d, Diff, Spec);
diffuse += Diff;
specular += Spec;
}
// calculate new color from diffuse and specular
color = (color*diffuse.xyz) + specular.xyz;
// Store to imagebuffers
imageStore(
hitbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(-1.0)
);
imageStore(
ray_dirbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(dir, 0.0)
);
imageStore(
ray_posbuffer,
ivec2(gl_GlobalInvocationID.xy),
vec4(pos, 0.0)
);
float reflectivity = 0.5f;
if (prev_color.x == 0 && prev_color.y == 0 && prev_color.z == 0)
reflectivity = 1.0f;
imageStore(
output_image,
ivec2(gl_GlobalInvocationID.xy),
vec4(color*reflectivity+prev_color, 0.0)
);
}
void GetTextures(int vertexid, out int tex, out int norm)
{
for (int i = 0; i < objs.length(); i++)
{
if (objs[i].info.x <= vertexid)
{
if (objs[i].info.y > vertexid)
{
tex = objs[i].info.z;
norm = objs[i].info.w;
return;
}
}
}
}
bool RayVsOcTree(vec3 ray_pos, vec3 ray_dir, float d)
{
for (int i = 0; i < ocs.length(); i++)
{
// Check collsion
if (RayVsAABB(ray_pos,ray_dir,ocs[i]._min,ocs[i]._max))
{
// IS IT LEAF NODE?
for (int j = ocs[i].start; j < ocs[i].stop; j+=3)
{
if (RayVsTriangle(ray_pos,ray_dir,verts[j],verts[j+1],verts[j+2], d))
return true;
}
}
else
i += ocs[i].childrenCount;
}
return false;
}
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1)
{
vec3 invdir = 1.0f / ray_dir.xyz;
float t1 = (pbounds0.x - ray_pos.x)*invdir.x;
float t2 = (pbounds1.x - ray_pos.x)*invdir.x;
float t3 = (pbounds0.y - ray_pos.y)*invdir.y;
float t4 = (pbounds1.y - ray_pos.y)*invdir.y;
float t5 = (pbounds0.z - ray_pos.z)*invdir.z;
float t6 = (pbounds1.z - ray_pos.z)*invdir.z;
float tmin = max(max(min(t1, t2), min(t3, t4)), min(t5, t6));
float tmax = min(min(max(t1, t2), max(t3, t4)), max(t5, t6));
if(tmax < 0 || tmin > tmax)
return false;
return true;
}
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float d)
{
vec3 e1 = v1.p - v0.p;
vec3 e2 = v2.p - v0.p;
vec3 q = cross(ray_dir, e2);
float a = dot(e1, q);
if (a == 0) return false;
float f = 1/a;
vec3 s = ray_pos - v0.p;
float u = f*(dot(s, q));
if (u < 0 || u > 1) return false;
vec3 p = cross(s, e1);
float v = f*(dot(ray_dir, p));
if (v < 0 || u + v > 1) return false;
float t = dot(e2, p) * f;
float skin = 0.0001;
if (t < skin) return false;
if (t >= d-skin) return false;
return true;
};
void ComputeDirectionalLight(DirectionalLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec)
{
diffuse = vec4(0.0f);
spec = vec4(0.0f);
if (RayVsOcTree(pos,L.Direction.xyz,20.f))
return;
float diffuseFactor = dot(L.Direction.xyz, normal);
if( diffuseFactor > 0.0f )
{
// diffuse
diffuse = diffuseFactor * L.Diffuse;
// specular
vec3 v = reflect(-L.Direction.xyz, normal);
float specFactor = pow(max(dot(v, toEye), 0.0f), 0.1f) * 0.1;//specpow) *specint;
spec = specFactor * L.Specular;
}
}
void ComputePointLight(PointLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec)
{
diffuse = vec4(0.0f);
spec = vec4(0.0f);
vec3 lightVec = L.Position - pos;
float d = length(lightVec);
lightVec /= d;
if (RayVsOcTree(L.Position,-lightVec,d))
return;
float att = (L.Range / d);
if ( att < 0.01f )
return;
float diffuseFactor = dot(lightVec, normal);
if( diffuseFactor > 0.0f )
{
// diffuse
diffuse = diffuseFactor * L.Diffuse;
diffuse *= att;
// specular
vec3 v = reflect(-lightVec, normal);
float specFactor = pow(max(dot(v, toEye), 0.0f), 0.1f) * 0.1;//specpow) *specint;
spec = specFactor * L.Specular;
spec *= att;
}
}
The intersection and color stage can be looped X times to create reflection.
Additional Features
- Picking – Implementation of a shader for calculating ray intersections against all vertices and highlighting the intersected one.
- Support for both Directional and Point lights.
- Normal mapping.
- Support for multiple materials per object.
- Support for multiple objects.
- Partition the mesh into an octree that is traversed in the GPU.