In this project I built a GPGPU based ray tracer using OpenGL 4.4 and compute shaders. The ray tracing consist of a 3 step pipeline.
Ray creation stage – Creates the rays from the cameras look, up and right vectors.
#version 430 core
uniform vec3 camera_right;
uniform vec3 camera_up;
uniform vec3 camera_position;
uniform vec3 camera_direction;
uniform bool clear_image;
layout (local_size_x = 16, local_size_y = 16) in;
// IMG
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 7) uniform image2D output_image;
void main(void)
float width = gl_WorkGroupSize.x*gl_NumWorkGroups.x; // pixels across
float height = gl_WorkGroupSize.y*gl_NumWorkGroups.y; // pixels high
float normalized_i = (gl_GlobalInvocationID.x / height) - (width/height)*0.5;
float normalized_j = (gl_GlobalInvocationID.y / height) - 0.5;
vec3 image_point = normalized_i * camera_right +
normalized_j * camera_up +
vec3 ray_direction = normalize(image_point);
vec3 ray_o = camera_position;
vec3 ray_d = ray_direction;
vec4(ray_direction.x, ray_direction.y, ray_direction.z, 0.0)
vec4(camera_position.x, camera_position.y, camera_position.z, 0.0)
if (!clear_image) return;
Intersection stage – Checks the rays collision against the meshes’ octree and traverses to the leaf node consisting of a triangle, and stores the hit-data for that ray.
#version 430 core
struct IsectData
float u;
float v;
float t;
float i;
IsectData data;
struct OcTree
vec3 _min;
int start;
vec3 _max;
int stop;
vec3 padding;
int childrenCount;
struct Vertex
vec3 p;
float u;
vec3 n;
float v;
vec4 t;
vec4 b;
vec4 c;
layout (local_size_x = 16, local_size_y = 16) in;
// OcTrees
layout (std430, binding = 1) buffer OcTreeBuffer { OcTree ocs []; };
// Vertices
layout (std140, binding = 2) buffer VertexBuffer { Vertex verts []; };
// input
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 5) uniform image2D hitbuffer;
// output
layout (rgba32f, binding = 7) uniform image2D output_image;
void RayVsOcTree(vec3 ray_pos, vec3 ray_dir);
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1);
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float i);
void main(void)
vec3 ray_d = imageLoad(ray_dirbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec3 ray_o = imageLoad(ray_posbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
data = IsectData(0,0,30,-1);
int i = 0;
RayVsOcTree(ray_o, ray_d);
void RayVsOcTree(vec3 ray_pos, vec3 ray_dir)
for (int i = 0; i < ocs.length(); i++)
// Check collsion
if (RayVsAABB(ray_pos,ray_dir,ocs[i]._min,ocs[i]._max))
for (int j = ocs[i].start; j < ocs[i].stop; j+=3)
RayVsTriangle(ray_pos,ray_dir,verts[j],verts[j+1],verts[j+2], float(j));
i += ocs[i].childrenCount;
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1)
vec3 invdir = 1.0f /;
float t1 = (pbounds0.x - ray_pos.x)*invdir.x;
float t2 = (pbounds1.x - ray_pos.x)*invdir.x;
float t3 = (pbounds0.y - ray_pos.y)*invdir.y;
float t4 = (pbounds1.y - ray_pos.y)*invdir.y;
float t5 = (pbounds0.z - ray_pos.z)*invdir.z;
float t6 = (pbounds1.z - ray_pos.z)*invdir.z;
float tmin = max(max(min(t1, t2), min(t3, t4)), min(t5, t6));
float tmax = min(min(max(t1, t2), max(t3, t4)), max(t5, t6));
if(tmax < 0 || tmin > tmax || tmin > data.t)
return false;
return true;
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float i)
vec3 e1 = v1.p - v0.p;
vec3 e2 = v2.p - v0.p;
vec3 q = cross(ray_dir, e2);
float a = dot(e1, q);
if (a == 0) return false;
float f = 1/a;
vec3 s = ray_pos - v0.p;
float u = f*(dot(s, q));
if (u < 0 || u > 1) return false;
vec3 p = cross(s, e1);
float v = f*(dot(ray_dir, p));
if (v < 0 || u + v > 1) return false;
float t = dot(e2, p) * f;
if (t < 0.0001) return false;
if (t >= data.t) return false;
data.u = u;
data.v = v;
data.t = t;
data.i = i;
return true;
Color stage – Samples normal map and texture for the hit position and creates and stores a new ray for reflection
#version 430 core
#extension GL_EXT_texture_array : enable
#extension GL_NV_texture_array : enable
struct PointLight
vec4 Diffuse;
vec4 Specular;
vec3 Position;
float Range;
struct DirectionalLight
vec4 Diffuse;
vec4 Specular;
vec4 Direction;
struct Object
ivec4 info;
struct OcTree
vec3 _min;
int start;
vec3 _max;
int stop;
vec3 padding;
int childrenCount;
struct Vertex
vec3 p;
float u;
vec3 n;
float v;
vec4 t;
vec4 b;
vec4 c;
layout (local_size_x = 16, local_size_y = 16) in;
// AABB Buffer
layout (std430, binding = 1) buffer OcTreeBuffer { OcTree ocs []; };
// Vertices
layout (std140, binding = 2) buffer VertexBuffer { Vertex verts []; };
// Point Lights
layout (std430, binding = 6) buffer PointLightBuffer { PointLight lights []; };
// Texture
uniform sampler2DArray v_Texture;
uniform unsigned int nrofLights;
// IMG
layout (rgba32f, binding = 3) uniform image2D ray_dirbuffer;
layout (rgba32f, binding = 4) uniform image2D ray_posbuffer;
layout (rgba32f, binding = 5) uniform image2D hitbuffer;
// output
layout (rgba32f, binding = 7) uniform image2D output_image;
void GetTextures(int vertexid, out int tex, out int norm);
bool RayVsOcTree(vec3 ray_pos, vec3 ray_dir, int ocid);
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1);
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float d);
void ComputePointLight(PointLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec);
void ComputeDirectionalLight(DirectionalLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec);
void main(void)
// Sample from imagebuffers
vec3 ray_d = imageLoad(ray_dirbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec3 ray_o = imageLoad(ray_posbuffer, ivec2(gl_GlobalInvocationID.xy)).xyz;
vec4 d = imageLoad(hitbuffer, ivec2(gl_GlobalInvocationID.xy)).xyzw;
vec3 prev_color = imageLoad(output_image, ivec2(gl_GlobalInvocationID.xy)).xyz;
// Store hitresult
float u = d.x;
float v = d.y;
float t = d.z;
int vertid = int(d.w);
// If no hit do nothing
if (vertid < 0) return;
// Get textures
int textureid, normalid;
GetTextures(vertid, textureid, normalid);
// Calculate uv
vec2 uv = (1 - u - v) * vec2(verts[vertid].u,verts[vertid].v);
uv += u * vec2(verts[vertid+1].u,verts[vertid+1].v);
uv += v * vec2(verts[vertid+2].u,verts[vertid+2].v);
// Sample texture- and normal map
vec3 color = texture2DArray(v_Texture, vec3(uv, textureid)).xyz + verts[vertid];//vec3(0,uv.x,uv.y);
vec3 normal = (1 - u - v) * verts[vertid].n + u * verts[vertid+1].n + v * verts[vertid+2].n;
vec3 tangent = (1 - u - v) * verts[vertid] + u * verts[vertid+1] + v * verts[vertid+2];
vec3 bitang = (1 - u - v) * verts[vertid] + u * verts[vertid+1] + v * verts[vertid+2];
// normalize normal (just to be sure)
normal = normalize(normal);
tangent = normalize(tangent);
bitang = normalize(bitang);
// sample normal map
if ( normalid > -1 )
vec3 normal_map = texture2DArray(v_Texture, vec3(uv, normalid)).xyz;
normal_map = (normal_map * 2.0f) - 1.0f;
mat3 texSpace = mat3(tangent, bitang, normal);//biTangent, input.Norm);
normal = normalize(texSpace*normal_map);
// calculate new direction of ray
vec3 dir = ray_d - dot(ray_d, normal) * 2 * normal;
// calculate new position of ray
vec3 pos = ray_o+ray_d*t;
vec4 diffuse = vec4(0, 0.1, 0.2, 0);
vec4 specular = vec4(0, 0, 0, 0);
// hardcoded directional light
DirectionalLight dirLight = DirectionalLight(vec4(0.2,0.2,0.1,1), vec4(0.2,0.2,0.2,1), normalize(vec4(1,0.2,0.5,1)));
vec4 Diff, Spec;
ComputeDirectionalLight(dirLight, pos, normal, -ray_d, Diff, Spec);
diffuse += Diff;
specular += Spec;
// loop the pointlights
for (int i = 0; i < nrofLights; i++)
vec4 Diff, Spec;
ComputePointLight(lights[i], pos, normal, -ray_d, Diff, Spec);
diffuse += Diff;
specular += Spec;
// calculate new color from diffuse and specular
color = (color* +;
// Store to imagebuffers
vec4(dir, 0.0)
vec4(pos, 0.0)
float reflectivity = 0.5f;
if (prev_color.x == 0 && prev_color.y == 0 && prev_color.z == 0)
reflectivity = 1.0f;
vec4(color*reflectivity+prev_color, 0.0)
void GetTextures(int vertexid, out int tex, out int norm)
for (int i = 0; i < objs.length(); i++)
if (objs[i].info.x <= vertexid)
if (objs[i].info.y > vertexid)
tex = objs[i].info.z;
norm = objs[i].info.w;
bool RayVsOcTree(vec3 ray_pos, vec3 ray_dir, float d)
for (int i = 0; i < ocs.length(); i++)
// Check collsion
if (RayVsAABB(ray_pos,ray_dir,ocs[i]._min,ocs[i]._max))
for (int j = ocs[i].start; j < ocs[i].stop; j+=3)
if (RayVsTriangle(ray_pos,ray_dir,verts[j],verts[j+1],verts[j+2], d))
return true;
i += ocs[i].childrenCount;
return false;
bool RayVsAABB(vec3 ray_pos, vec3 ray_dir, vec3 pbounds0, vec3 pbounds1)
vec3 invdir = 1.0f /;
float t1 = (pbounds0.x - ray_pos.x)*invdir.x;
float t2 = (pbounds1.x - ray_pos.x)*invdir.x;
float t3 = (pbounds0.y - ray_pos.y)*invdir.y;
float t4 = (pbounds1.y - ray_pos.y)*invdir.y;
float t5 = (pbounds0.z - ray_pos.z)*invdir.z;
float t6 = (pbounds1.z - ray_pos.z)*invdir.z;
float tmin = max(max(min(t1, t2), min(t3, t4)), min(t5, t6));
float tmax = min(min(max(t1, t2), max(t3, t4)), max(t5, t6));
if(tmax < 0 || tmin > tmax)
return false;
return true;
bool RayVsTriangle(vec3 ray_pos, vec3 ray_dir, Vertex v0, Vertex v1, Vertex v2, float d)
vec3 e1 = v1.p - v0.p;
vec3 e2 = v2.p - v0.p;
vec3 q = cross(ray_dir, e2);
float a = dot(e1, q);
if (a == 0) return false;
float f = 1/a;
vec3 s = ray_pos - v0.p;
float u = f*(dot(s, q));
if (u < 0 || u > 1) return false;
vec3 p = cross(s, e1);
float v = f*(dot(ray_dir, p));
if (v < 0 || u + v > 1) return false;
float t = dot(e2, p) * f;
float skin = 0.0001;
if (t < skin) return false;
if (t >= d-skin) return false;
return true;
void ComputeDirectionalLight(DirectionalLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec)
diffuse = vec4(0.0f);
spec = vec4(0.0f);
if (RayVsOcTree(pos,,20.f))
float diffuseFactor = dot(, normal);
if( diffuseFactor > 0.0f )
// diffuse
diffuse = diffuseFactor * L.Diffuse;
// specular
vec3 v = reflect(, normal);
float specFactor = pow(max(dot(v, toEye), 0.0f), 0.1f) * 0.1;//specpow) *specint;
spec = specFactor * L.Specular;
void ComputePointLight(PointLight L, vec3 pos, vec3 normal, vec3 toEye, out vec4 diffuse, out vec4 spec)
diffuse = vec4(0.0f);
spec = vec4(0.0f);
vec3 lightVec = L.Position - pos;
float d = length(lightVec);
lightVec /= d;
if (RayVsOcTree(L.Position,-lightVec,d))
float att = (L.Range / d);
if ( att < 0.01f )
float diffuseFactor = dot(lightVec, normal);
if( diffuseFactor > 0.0f )
// diffuse
diffuse = diffuseFactor * L.Diffuse;
diffuse *= att;
// specular
vec3 v = reflect(-lightVec, normal);
float specFactor = pow(max(dot(v, toEye), 0.0f), 0.1f) * 0.1;//specpow) *specint;
spec = specFactor * L.Specular;
spec *= att;
The intersection and color stage can be looped X times to create reflection.
Additional Features
- Picking – Implementation of a shader for calculating ray intersections against all vertices and highlighting the intersected one.
- Support for both Directional and Point lights.
- Normal mapping.
- Support for multiple materials per object.
- Support for multiple objects.
- Partition the mesh into an octree that is traversed in the GPU.