I have been following this lesson for implementing a particle system.
Trying to bring the particle system in 3D scene .
My entry point and initialization looks like :
bool initOpenGL()
// Intialize GLFW
// GLFW is configured. Must be called before calling any GLFW functions
if (!glfwInit())
// An error occured
std::cerr << "GLFW initialization failed" << std::endl;
return false;
glfwWindowHint(GLFW_SAMPLES, 4);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // forward compatible with newer versions of OpenGL as they become available but not backward compatible (it will not run on devices that do not support OpenGL 3.3
// Create an OpenGL 3.3 core, forward compatible context window
gWindow = glfwCreateWindow(gWindowWidth, gWindowHeight, APP_TITLE, NULL, NULL);
if (gWindow == NULL)
std::cerr << "Failed to create GLFW window" << std::endl;
return false;
// Make the window's context the current one
// Initialize GLEW
glewExperimental = GL_TRUE;
if (glewInit() != GLEW_OK)
std::cerr << "Failed to initialize GLEW" << std::endl;
return false;
// Set the required callback functions
glfwSetKeyCallback(gWindow, glfw_onKey);
glfwSetFramebufferSizeCallback(gWindow, glfw_onFramebufferSize);
glfwSetScrollCallback(gWindow, glfw_onMouseScroll);
glClearColor(gClearColor.r, gClearColor.g, gClearColor.b, gClearColor.a);
// Define the viewport dimensions
glViewport(0, 0, gWindowWidth, gWindowHeight);
return true;
Trying to render both the particle system and 3D scene composed of meshes in the same scene like:
glGenVertexArrays(1, &VertexArrayID);
programID = LoadShaders("shaders/Particle.vertexshader", "shaders/Particle.fragmentshader");
CameraRight_worldspace_ID = glGetUniformLocation(programID, "CameraRight_worldspace");
CameraUp_worldspace_ID = glGetUniformLocation(programID, "CameraUp_worldspace");
ViewProjMatrixID = glGetUniformLocation(programID, "VP");
TextureID = glGetUniformLocation(programID, "myTextureSampler");
for (int i = 0; i < MaxParticles; i++)
ParticlesContainer[i].life = -1.0f;
ParticlesContainer[i].cameradistance = -1.0f;
Texture = loadDDS("textures/particle.DDS");
glGenBuffers(1, &billboard_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, billboard_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW);
glGenBuffers(1, &particles_position_buffer);
glBindBuffer(GL_ARRAY_BUFFER, particles_position_buffer);
// Initialize with empty (NULL) buffer : it will be updated later, each frame.
glBufferData(GL_ARRAY_BUFFER, MaxParticles * 4 * sizeof(GLfloat), NULL, GL_STREAM_DRAW);
// The VBO containing the colors of the particles
glGenBuffers(1, &particles_color_buffer);
glBindBuffer(GL_ARRAY_BUFFER, particles_color_buffer);
// Initialize with empty (NULL) buffer : it will be updated later, each frame.
glBufferData(GL_ARRAY_BUFFER, MaxParticles * 4 * sizeof(GLubyte), NULL, GL_STREAM_DRAW);
while (!glfwWindowShouldClose(gWindow))
double currentTime = glfwGetTime();
double deltaTime = currentTime - lastTime;
// Poll for and process events
// Clear the screen
glm::mat4 model(1.0), view(1.0), projection(1.0);
// Create the View matrix
view = fpsCamera.getViewMatrix();
glm::mat4 ViewMatrix = view;
// Create the projection matrix
projection = glm::perspective(glm::radians(fpsCamera.getFOV()), (float)gWindowWidth / (float)gWindowHeight, 0.1f, 200.0f);
// update the view (camera) position
glm::vec3 viewPos;
viewPos.x = fpsCamera.getPosition().x;
viewPos.y = fpsCamera.getPosition().y;
viewPos.z = fpsCamera.getPosition().z;
glm::vec3 CameraPosition(glm::inverse(view)[3]);
glm::mat4 ViewProjectionMatrix = projection * view;
int newparticles = (int)(deltaTime * 10000.0);
if (newparticles > (int)(0.016f * 10000.0))
newparticles = (int)(0.016f * 10000.0);
for (int i = 0; i < newparticles; i++)
int particleIndex = FindUnusedParticle();
ParticlesContainer[particleIndex].life = 1.0f; // This particle will live 5 seconds.
ParticlesContainer[particleIndex].pos = glm::vec3(0, 0, -11.0f);
float spread = 1.5f;
glm::vec3 maindir = glm::vec3(0.0f, 10.0f, 0.0f);
// Very bad way to generate a random direction;
// See for instance instead,
// combined with some user-controlled parameters (main direction, spread, etc)
glm::vec3 randomdir = glm::vec3(
(rand() % 2000 - 1000.0f) / 1000.0f,
(rand() % 2000 - 1000.0f) / 1000.0f,
(rand() % 2000 - 1000.0f) / 1000.0f);
ParticlesContainer[particleIndex].speed = maindir + randomdir * spread;
// Very bad way to generate a random color
ParticlesContainer[particleIndex].r = rand() % 256;
ParticlesContainer[particleIndex].g = rand() % 256;
ParticlesContainer[particleIndex].b = rand() % 256;
ParticlesContainer[particleIndex].a = (rand() % 256) / 3;
ParticlesContainer[particleIndex].size = (rand() % 1000) / 2000.0f + 0.1f;
// Simulate all particles
int ParticlesCount = 0;
for (int i = 0; i < MaxParticles; i++)
Particle &p = ParticlesContainer[i]; // shortcut
if ( > 0.0f)
// Decrease life -= deltaTime;
if ( > 0.0f)
// Simulate simple physics : gravity only, no collisions
p.speed += glm::vec3(0.0f, -9.81f, 0.0f) * (float)deltaTime * 0.5f;
p.pos += p.speed * (float)deltaTime;
// if (i == 1)
// {
// // std::cout << glm::to_string(p.pos) << std::endl;
// }
// std::cout << glm::to_string(p.pos) << std::endl;
p.cameradistance = glm::length2(p.pos - CameraPosition);
//ParticlesContainer[i].pos += glm::vec3(0.0f,10.0f, 0.0f) * (float)delta;
// Fill the GPU buffer
g_particule_position_size_data[4 * ParticlesCount + 0] = p.pos.x;
g_particule_position_size_data[4 * ParticlesCount + 1] = p.pos.y;
g_particule_position_size_data[4 * ParticlesCount + 2] = p.pos.z;
g_particule_position_size_data[4 * ParticlesCount + 3] = p.size;
g_particule_color_data[4 * ParticlesCount + 0] = p.r;
g_particule_color_data[4 * ParticlesCount + 1] = p.g;
g_particule_color_data[4 * ParticlesCount + 2] = p.b;
g_particule_color_data[4 * ParticlesCount + 3] = p.a;
// Particles that just died will be put at the end of the buffer in SortParticles();
p.cameradistance = -1.0f;
glBindBuffer(GL_ARRAY_BUFFER, particles_position_buffer);
glBufferData(GL_ARRAY_BUFFER, MaxParticles * 4 * sizeof(GLfloat), NULL, GL_STREAM_DRAW); // Buffer orphaning, a common way to improve streaming perf. See above link for details.
glBufferSubData(GL_ARRAY_BUFFER, 0, ParticlesCount * sizeof(GLfloat) * 4, g_particule_position_size_data);
glBindBuffer(GL_ARRAY_BUFFER, particles_color_buffer);
glBufferData(GL_ARRAY_BUFFER, MaxParticles * 4 * sizeof(GLubyte), NULL, GL_STREAM_DRAW); // Buffer orphaning, a common way to improve streaming perf. See above link for details.
glBufferSubData(GL_ARRAY_BUFFER, 0, ParticlesCount * sizeof(GLubyte) * 4, g_particule_color_data);
// Use our shader
glBindTexture(GL_TEXTURE_2D, Texture);
// Set our "myTextureSampler" sampler to use Texture Unit 0
glUniform1i(TextureID, 0);
glUniform3f(CameraRight_worldspace_ID, ViewMatrix[0][0], ViewMatrix[1][0], ViewMatrix[2][0]);
glUniform3f(CameraUp_worldspace_ID, ViewMatrix[0][1], ViewMatrix[1][1], ViewMatrix[2][1]);
glUniformMatrix4fv(ViewProjMatrixID, 1, GL_FALSE, &ViewProjectionMatrix[0][0]);
// 1rst attribute buffer : vertices
glBindBuffer(GL_ARRAY_BUFFER, billboard_vertex_buffer);
0, // attribute. No particular reason for 0, but must match the layout in the shader.
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void *)0 // array buffer offset
glBindBuffer(GL_ARRAY_BUFFER, particles_position_buffer);
1, // attribute. No particular reason for 1, but must match the layout in the shader.
4, // size : x + y + z + size => 4
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void *)0 // array buffer offset
// 3rd attribute buffer : particles' colors
glBindBuffer(GL_ARRAY_BUFFER, particles_color_buffer);
2, // attribute. No particular reason for 1, but must match the layout in the shader.
4, // size : r + g + b + a => 4
GL_TRUE, // normalized? *** YES, this means that the unsigned char[4] will be accessible with a vec4 (floats) in the shader ***
0, // stride
(void *)0 // array buffer offset
// These functions are specific to glDrawArrays*Instanced*.
// The first parameter is the attribute buffer we're talking about.
// The second parameter is the "rate at which generic vertex attributes advance when rendering multiple instances"
glVertexAttribDivisor(0, 0); // particles vertices : always reuse the same 4 vertices -> 0
glVertexAttribDivisor(1, 1); // positions : one per quad (its center) -> 1
glVertexAttribDivisor(2, 1); // color : one per quad -> 1
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, ParticlesCount);
// Must be called BEFORE setting uniforms because setting uniforms is done
// on the currently active shader program.
lightingShader.setUniform("model", glm::mat4(1.0)); // do not need to translate the models so just send the identity matrix
lightingShader.setUniform("view", view);
lightingShader.setUniform("projection", projection);
lightingShader.setUniform("viewPos", viewPos);
// // Directional light
lightingShader.setUniform("sunLight.direction", glm::vec3(0.0f, -0.9f, -0.17f));
lightingShader.setUniform("sunLight.ambient", glm::vec3(0.1f, 0.1f, 0.1f));
lightingShader.setUniform("sunLight.diffuse", glm::vec3(0.1f, 0.1f, 0.1f)); // dark
lightingShader.setUniform("sunLight.specular", glm::vec3(0.1f, 0.1f, 0.1f));
lightingShader.setUniform("spotLight.ambient", glm::vec3(0.1f, 0.1f, 0.1f));
lightingShader.setUniform("spotLight.diffuse", glm::vec3(0.8f, 0.8f, 0.8f));
lightingShader.setUniform("spotLight.specular", glm::vec3(1.0f, 1.0f, 1.0f));
lightingShader.setUniform("spotLight.position", glm::vec3(0.982347, 3.500000, 10.248156));
lightingShader.setUniform("spotLight.direction", glm::vec3(-0.202902, -0.470038, -0.859008));
lightingShader.setUniform("spotLight.cosInnerCone", glm::cos(glm::radians(15.0f)));
lightingShader.setUniform("spotLight.cosOuterCone", glm::cos(glm::radians(20.0f)));
lightingShader.setUniform("spotLight.constant", 1.0f);
lightingShader.setUniform("spotLight.linear", 0.007f);
lightingShader.setUniform("spotLight.exponent", 0.0017f);
lightingShader.setUniform("spotLight.on", gFlashlightOn);
// Render the scene
for (int i = 0; i < 1; i++)
model = glm::translate(glm::mat4(1.0), modelPos[i]) * glm::scale(glm::mat4(1.0), modelScale[i]); // * glm::rotate(glm::mat4(1.0), glm::radians((float)(glfwGetTime() * 100.0f)), glm::vec3(1.0f, 0.0f, 0.0f));
lightingShader.setUniform("model", model);
// // Set material properties
lightingShader.setUniform("material.ambient", glm::vec3(0.1f, 0.1f, 0.1f));
lightingShader.setUniformSampler("material.diffuseMap", 0);
lightingShader.setUniform("material.specular", glm::vec3(0.8f, 0.8f, 0.8f));
lightingShader.setUniform("material.shininess", 32.0f);
texture[i].bind(0); // set the texture before drawing. Our simple OBJ mesh loader does not do materials yet.
mesh[i].draw(); // Render the OBJ mesh
// Swap front and back buffers
lastTime = currentTime;
And only the 3D scene is getting rendered like :
And when I comment the rendering of the mesh logic out, ie (This section)
for (int i = 0; i < 1; i++)
model = glm::translate(glm::mat4(1.0), modelPos[i]) * glm::scale(glm::mat4(1.0), modelScale[i]); // * glm::rotate(glm::mat4(1.0), glm::radians((float)(glfwGetTime() * 100.0f)), glm::vec3(1.0f, 0.0f, 0.0f));
lightingShader.setUniform("model", model);
// // Set material properties
lightingShader.setUniform("material.ambient", glm::vec3(0.1f, 0.1f, 0.1f));
lightingShader.setUniformSampler("material.diffuseMap", 0);
lightingShader.setUniform("material.specular", glm::vec3(0.8f, 0.8f, 0.8f));
lightingShader.setUniform("material.shininess", 32.0f);
texture[i].bind(0); // set the texture before drawing. Our simple OBJ mesh loader does not do materials yet.
mesh[i].draw(); // Render the OBJ mesh
I get :
How would I render both of them at the same time?
My codebase: github

You simply missed to bind the vertex array object for the particles, before specifying the vertex attribute arrays for the particls:
while (!glfwWindowShouldClose(gWindow))
// [...]
glBindVertexArray(VertexArrayID); // <--- this is missing
glBindBuffer(GL_ARRAY_BUFFER, billboard_vertex_buffer);
// [...]
// [...]
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, ParticlesCount);
// [...]
But note it is sufficient to specify the arrays of generic vertex attribute data once and to bind the vertex array object for drawing:
// 1rst attribute buffer : vertices
glBindBuffer(GL_ARRAY_BUFFER, billboard_vertex_buffer);
// [...]
glBindBuffer(GL_ARRAY_BUFFER, particles_position_buffer);
// [...]
// 3rd attribute buffer : particles' colors
glBindBuffer(GL_ARRAY_BUFFER, particles_color_buffer);
// [...]
glVertexAttribDivisor(0, 0); // particles vertices : always reuse the same 4 vertices -> 0
glVertexAttribDivisor(1, 1); // positions : one per quad (its center) -> 1
glVertexAttribDivisor(2, 1); // color : one per quad -> 1
while (!glfwWindowShouldClose(gWindow))
// [...]
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, ParticlesCount);
// [...]
for (int i = 0; i < 1; i++)
// [...]
// [...]


Is the render target view the only way to output data from pixel shader in DirectX?

I want to render an image in the screen and save it in my disk.
I have a render target view.
I have a input shader resource view with its texture (D3D11_USAGE_DYNAMIC).
I have a output shader resource view with its texture (D3D11_USAGE_DEFAULT).
I have a auxiliar simple texture (D3D11_USAGE_STAGING).
The execution path is the following:
Read input image in a texture.
Bind the input texture view and output texture view, pixel shader, sampler and vertex shader.
Run draw command.
Copy output texture to auxiliar texture.
Save auxiliar texture in a image. The image is empty.
How can I output an additional texture and still rendering on screen?
Example code
mWidth = width;
mHeight = height;
// Create image texture to hold input image for unormalized values and CPU write/GPU read access
D3D11_TEXTURE2D_DESC inputImageDesc;
ZeroMemory(&inputImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
inputImageDesc.ArraySize = 1;
inputImageDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
inputImageDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; // Needed for cpu write and gpu read
inputImageDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
inputImageDesc.Width = width;
inputImageDesc.Height = height;
inputImageDesc.MipLevels = 1;
inputImageDesc.SampleDesc.Count = 1;
inputImageDesc.SampleDesc.Quality = 0;
inputImageDesc.Usage = D3D11_USAGE_DYNAMIC; // Needed for cpu write and gpu read
result = engine.device()->CreateTexture2D(&inputImageDesc, nullptr, mInputTexture.GetAddressOf());
if(result < 0)
return -1;
result = engine.device()->CreateShaderResourceView(mInputTexture.Get(), nullptr, mInputView.GetAddressOf());
if(result < 0)
return -1;
// Create image texture for unormalized values and only GPU access
D3D11_TEXTURE2D_DESC gpuImageDesc;
ZeroMemory(&gpuImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
gpuImageDesc.ArraySize = 1;
gpuImageDesc.CPUAccessFlags = 0; // Needed for gpu read/write (cpu no access)
gpuImageDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
gpuImageDesc.Width = width;
gpuImageDesc.Height = height;
gpuImageDesc.MipLevels = 1;
gpuImageDesc.SampleDesc.Count = 1;
gpuImageDesc.SampleDesc.Quality = 0;
gpuImageDesc.Usage = D3D11_USAGE_DEFAULT; // Needed for gpu read/write (cpu no access)
result = engine.device()->CreateTexture2D(&gpuImageDesc, nullptr, mOutputGpuTexture.GetAddressOf());
if(result < 0)
return -1;
result = engine.device()->CreateShaderResourceView(mOutputGpuTexture.Get(), nullptr, mOutputView.GetAddressOf());
if(result < 0)
return -1;
// Create image texture for unormalized values and only CPU read access
D3D11_TEXTURE2D_DESC cpuImageDesc;
ZeroMemory(&cpuImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
cpuImageDesc.BindFlags = 0;
cpuImageDesc.MiscFlags = 0;
cpuImageDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; // Needed for cpu read
cpuImageDesc.Usage = D3D11_USAGE_STAGING; // Needed for cpu read
result = engine.device()->CreateTexture2D(&cpuImageDesc, nullptr, mOutputCpuTexture.GetAddressOf());
if(result < 0)
return -1;
struct PixelInput
float4 position : SV_POSITION;
float4 color : COLOR;
float2 coord : TEXCOORDIN;
float2 coordOut : TEXCOORDOUT;
Texture2D<float4> gInputTexture : register(t0);
SamplerState gSampleType : register(s0);
RWTexture2D<float4> gOutputTexture : register(t1);
float4 main(PixelInput input) : SV_TARGET
gOutputTexture[input.coordOut] = float4(1.0,0.0,0.0,1.0);
float4 inputPixel = float4(0.0, 0.0, 0.0, 1.0);
inputPixel.rgb = gInputTexture.Sample(gSampleType, input.coord).rgb;
return inputPixel;
engine.context()->CopyResource(mOutputCpuTexture.Get(), mOutputGpuTexture.Get());
ZeroMemory(&mappedImgData, sizeof(D3D11_MAPPED_SUBRESOURCE));
int32_t result = engine.context()->Map(mOutputCpuTexture.Get(), 0, D3D11_MAP_READ, 0, &mappedImgData);
if(result < EC_SUCCESS)
// Copy the less bytes possible, avoiding out of bounds.
const uint32_t bytesPerRow = std::min(rowPitch, mappedImgData.RowPitch);
uint8_t* textureData = reinterpret_cast<uint8_t*>(mappedImgData.pData);
for(uint32_t i = 0; i < height; ++i)
memcpy(dst, textureData, bytesPerRow);
textureData += mappedImgData.RowPitch;
dst += rowPitch;
engine.context()->Unmap(mOutputCpuTexture.Get(), 0);
What I did to fix this was add a compute shader as intermediary, in that way I read a RWTexture (DEFAULT) with another texture (STAGING) and also another one to read the back buffer.

How to get parallel GPU pixel rendering? For voxel ray tracing

I made a voxel raycaster in Unity using a compute shader and a texture. But at 1080p, it is limited to a view distance of only 100 at 30 fps. With no light bounces yet or anything, I am quite disappointed with this performance.
I tried learning Vulkan and the best tutorials are based on rasterization, and I guess all I really want to do is compute pixels in parallel on the GPU. I am familiar with CUDA and I've read that is sometimes used for rendering? Or is there a simple way of just computing pixels in parallel in Vulcan? I've already got a template Vulkan project that opens a blank window. I don't need to get any data back from the GPU just render straight to the screen after giving it data.
And with the code below would it be significantly faster in Vulkan as opposed to a Unity compute shader? It has A LOT of if/else statements in it which I have read is bad for GPUs but I can't think of any other way of writing it.
EDIT: I optimized it as much as I could but it's still pretty slow, like 30 fps at 1080p.
Here is the compute shader:
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
const float width; // in pixels
const float height;
const StructuredBuffer<int> voxelMaterials; // for now just getting a flat voxel array
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const StructuredBuffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
void CSMain (uint3 id : SV_DispatchThreadID)
Result[id.xy] = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the direction to that point
float distanceTraveled = 0;
int3 directionAxes; // 1 for positive, 0 for zero, -1 for negative
int3 directionIfReplacements = { 0, 0, 0 }; // 1 for positive, 0 for zero, -1 for negative
float3 axesUnit = { 1 / abs(direction.x), 1 / abs(direction.y), 1 / abs(direction.z) };
float3 distancesXYZ = { 1000, 1000, 1000 };
int face = 0; // 1 = x, 2 = y, 3 = z // the current face the while loop point is on
// comparing the floats once in the beginning so the rest of the ray traversal can compare ints
if (direction.x > 0) {
directionAxes.x = 1;
directionIfReplacements.x = 1;
else if (direction.x < 0) {
directionAxes.x = -1;
else {
distanceTraveled = maxRayDistance; // just ending the ray for now if one of it's direction axes is exactly 0. You'll see a line of black pixels if the player's rotation is zero but this never happens naturally
directionAxes.x = 0;
if (direction.y > 0) {
directionAxes.y = 1;
directionIfReplacements.y = 1;
else if (direction.y < 0) {
directionAxes.y = -1;
else {
distanceTraveled = maxRayDistance;
directionAxes.y = 0;
if (direction.z > 0) {
directionAxes.z = 1;
directionIfReplacements.z = 1;
else if (direction.z < 0) {
directionAxes.z = -1;
else {
distanceTraveled = maxRayDistance;
directionAxes.z = 0;
// calculating the first point
if (playerCameraPosition.x < voxelBufferRowSize &&
playerCameraPosition.x >= 0 &&
playerCameraPosition.y < voxelBufferRowSize &&
playerCameraPosition.y >= 0 &&
playerCameraPosition.z < voxelBufferRowSize &&
playerCameraPosition.z >= 0)
int voxelIndex = floor(playerCameraPosition.x) + (floor(playerCameraPosition.z) * voxelBufferRowSize) + (floor(playerCameraPosition.y) * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0);
distanceTraveled = maxRayDistance; // to end the while loop
case 2:
Result[id.xy] = float4(0, 1, 0, 0);
distanceTraveled = maxRayDistance;
case 3:
Result[id.xy] = float4(0, 0, 1, 0);
distanceTraveled = maxRayDistance;
// traversing the ray beyond the first point
while (distanceTraveled < maxRayDistance)
switch (face) {
case 1:
distancesXYZ.x = axesUnit.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
case 2:
distancesXYZ.y = axesUnit.y;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
case 3:
distancesXYZ.z = axesUnit.z;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
face = 0; // 1 = x, 2 = y, 3 = z
float smallestDistance = 1000;
if (distancesXYZ.x < smallestDistance) {
smallestDistance = distancesXYZ.x;
face = 1;
if (distancesXYZ.y < smallestDistance) {
smallestDistance = distancesXYZ.y;
face = 2;
if (distancesXYZ.z < smallestDistance) {
smallestDistance = distancesXYZ.z;
face = 3;
if (smallestDistance == 0) {
int3 facesIfReplacement = { 1, 1, 1 };
switch (face) { // directionIfReplacements is positive if positive but I want to subtract so invert it to subtract 1 when negative subtract nothing when positive
case 1:
facesIfReplacement.x = 1 - directionIfReplacements.x;
case 2:
facesIfReplacement.y = 1 - directionIfReplacements.y;
case 3:
facesIfReplacement.z = 1 - directionIfReplacements.z;
pointHolder += direction * smallestDistance; // the acual ray marching
distanceTraveled += smallestDistance;
int3 voxelIndexXYZ = { -1,-1,-1 }; // the integer coordinates within the buffer
voxelIndexXYZ.x = ceil(pointHolder.x - facesIfReplacement.x);
voxelIndexXYZ.y = ceil(pointHolder.y - facesIfReplacement.y);
voxelIndexXYZ.z = ceil(pointHolder.z - facesIfReplacement.z);
//check if voxelIndexXYZ is within bounds of the voxel buffer before indexing the array
if (voxelIndexXYZ.x < voxelBufferRowSize &&
voxelIndexXYZ.x >= 0 &&
voxelIndexXYZ.y < voxelBufferRowSize &&
voxelIndexXYZ.y >= 0 &&
voxelIndexXYZ.z < voxelBufferRowSize &&
voxelIndexXYZ.z >= 0)
int voxelIndex = voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance; // to end the while loop
case 2:
Result[id.xy] = float4(0, 1, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
case 3:
Result[id.xy] = float4(0, 0, 1, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
else {
break; // should be uncommented in actual game implementation where the player will always be inside the voxel buffer
Depending on the voxel data you give it it produces this:
And here is the shader after "optimizing" it and taking out all branching or diverging conditional statements (I think):
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
float4 resultHolder;
const float width; // in pixels
const float height;
const Buffer<int> voxelMaterials; // for now just getting a flat voxel array
const Buffer<float4> voxelColors;
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const Buffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
[numthreads(16, 16, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
resultHolder = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the transformed ray direction in world space
const bool anyDir0 = direction.x == 0 || direction.y == 0 || direction.z == 0; // preventing a division by zero
float distanceTraveled = maxRayDistance * anyDir0;
const float3 nonZeroDirection = { // to prevent a division by zero
direction.x + (1 * anyDir0),
direction.y + (1 * anyDir0),
direction.z + (1 * anyDir0)
const float3 axesUnits = { // the distances if the axis is an integer
1.0f / abs(nonZeroDirection.x),
1.0f / abs(nonZeroDirection.y),
1.0f / abs(nonZeroDirection.z)
const bool3 isDirectionPositiveOr0 = {
direction.x >= 0,
direction.y >= 0,
direction.z >= 0
while (distanceTraveled < maxRayDistance)
const bool3 pointIsAnInteger = {
(int)pointHolder.x == pointHolder.x,
(int)pointHolder.y == pointHolder.y,
(int)pointHolder.z == pointHolder.z
const float3 distancesXYZ = {
((floor(pointHolder.x + isDirectionPositiveOr0.x) - pointHolder.x) / direction.x * !pointIsAnInteger.x) + (axesUnits.x * pointIsAnInteger.x),
((floor(pointHolder.y + isDirectionPositiveOr0.y) - pointHolder.y) / direction.y * !pointIsAnInteger.y) + (axesUnits.y * pointIsAnInteger.y),
((floor(pointHolder.z + isDirectionPositiveOr0.z) - pointHolder.z) / direction.z * !pointIsAnInteger.z) + (axesUnits.z * pointIsAnInteger.z)
float smallestDistance = min(distancesXYZ.x, distancesXYZ.y);
smallestDistance = min(smallestDistance, distancesXYZ.z);
pointHolder += direction * smallestDistance;
distanceTraveled += smallestDistance;
const int3 voxelIndexXYZ = {
floor(pointHolder.x) - (!isDirectionPositiveOr0.x && (int)pointHolder.x == pointHolder.x),
floor(pointHolder.y) - (!isDirectionPositiveOr0.y && (int)pointHolder.y == pointHolder.y),
floor(pointHolder.z) - (!isDirectionPositiveOr0.z && (int)pointHolder.z == pointHolder.z)
const bool inBounds = (voxelIndexXYZ.x < voxelBufferRowSize && voxelIndexXYZ.x >= 0) && (voxelIndexXYZ.y < voxelBufferRowSize && voxelIndexXYZ.y >= 0) && (voxelIndexXYZ.z < voxelBufferRowSize && voxelIndexXYZ.z >= 0);
const int voxelIndexFlat = (voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize)) * inBounds; // meaning the voxel on 0,0,0 will always be empty and act as a our index out of range prevention
if (voxelMaterials[voxelIndexFlat] > 0) {
resultHolder = voxelColors[voxelMaterials[voxelIndexFlat]] * (1 - (distanceTraveled / maxRayDistance));
if (!inBounds) break;
Result[id.xy] = resultHolder;
Compute shader is what it is: a program that runs on a GPU, be it on vulkan, or in Unity, so you are doing it in parallel either way. The point of vulkan, however, is that it gives you more control about the commands being executed on GPU - synchronization, memory, etc. So its not neccesseraly going to be faster in vulkan than in unity. So, what you should do is actually optimise your shaders.
Also, the main problem with if/else is divergence within groups of invocations which operate in lock-step. So, if you can avoid it, the performance impact will be far lessened. These may help you with that.
If you still want to do all that in vulkan...
Since you are not going to do any of the triangle rasterisation, you probably won't need renderpasses or graphics pipelines that the tutorials generally show. Instead you are going to need a compute shader pipeline. Those are far simplier than graphics pipelines, only requiring one shader and the pipeline layout(the inputs and outputs are bound via descriptor sets).
You just need to pass the swapchain image to the compute shader as a storage image in a descriptor (and of course any other data your shader may need, all are passed via descriptors). For that you need to specify VK_IMAGE_USAGE_STORAGE_BIT in your swapchain creation structure.
Then, in your command buffer you bind the descriptor sets with image and other data, bind the compute pipeline, and dispatch it as you probably do in Unity. The swapchain presentation and submitting the command buffers shouldn't be different than how the graphics works in the tutorials.

In vulkan: I want save a depth image to file, but always got a error depth image

I want to save a depth image that from frame buffer render result.
1, I create a stage buffer used to save image data.
2, use vkCmdCopyImageToBuffer copy depth image to stage buffer.
3, use vkMapMemory map this stage buffer memory to host memory.
4, read host memory and write depth data to a file.
but always got an error depth image. I don't know where have wrong.
application window output.
bug depth image file.
(source file)
save depth image function:
VkDeviceSize size = WIDTH * HEIGHT * 4;
VkBuffer dstBuffer;
VkDeviceMemory dstMemory;
VkCommandBuffer copyCmd = beginSingleTimeCommands();
// depth format -> VK_FORMAT_D32_SFLOAT_S8_UINT
VkBufferImageCopy region = {};
region.bufferOffset = 0;
region.bufferImageHeight = 0;
region.bufferRowLength = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageOffset = VkOffset3D{ 0, 0, 0 };
region.imageExtent = VkExtent3D{ swapChainExtent.width, swapChainExtent.height, 1};
// Map image memory so we can start copying from it
void *data;
vkMapMemory(device, dstMemory, 0, size, 0, &data);
std::ofstream file(path, std::ios::out | std::ios::binary);
// ppm header
file << "P6\n" << WIDTH << "\n" << HEIGHT << "\n" << 255 << "\n";
float *row = (float*)data;
auto size_v = WIDTH * HEIGHT;
for (uint32_t y = 0; y < size_v; y++) {
file.write((char*)row + 1, 1);
file.write((char*)row + 1, 1);
file.write((char*)row + 1, 1);
// Clean up resources
vkUnmapMemory(device, dstMemory);
vkFreeMemory(device, dstMemory, nullptr);
vkDestroyBuffer(device, dstBuffer, nullptr);
hope someone drag me out. thanks!
Assuming you've done all the transfer work correctly, your mapped data is basically an array of floats. This is reflected in your code by this line:
float *row = (float*)data;
However, when you actually write out the file you're treating the data like bytes...
file.write((char*)row + 1, 1);
So you're writing out 8 bytes of a 32 bit float. What you need is some function to convert from the float to a color value.
Assuming the depth value is normalized (I can't remember off the top of my head whether this is the case, or if it's dependent on the pipeline or framebuffer setup) and if you just want greyscale, you could use
uint8_t map(float f) {
return (uint8_t)(f * 255.0f);
and inside your file writing loop you'd so something like
uint8_t grey = map(*row);
file.write(&grey, 1);
file.write(&grey, 1);
file.write(&grey, 1);
Alternatively if you want some sort of color gradient for easier visulization you'd want a more complex mapping function...
vec3 colorWheel(float normalizedHue) {
float v = normalizedHue * 6.f;
if (v < 0.f) {
return vec3(1.f, 0.f, 0.f);
} else if (v < 1.f) {
return vec3(1.f, v, 0.f);
} else if (v < 2.f) {
return vec3(1.f - (v-1.f), 1.f, 0.f);
} else if (v < 3.f) {
return vec3(0.f, 1.f, (v-2.f));
} else if (v < 4.f) {
return vec3(0.f, 1.f - (v-3.f), 1.f );
} else if (v < 5.f) {
return vec3((v-4.f), 0.f, 1.f );
} else if (v < 6.f) {
return vec3(1.f, 0.f, 1.f - (v-5.f));
} else {
return vec3(1.f, 0.f, 0.f);
and in your file output loop...
vec3 color = colorWheel(*row);
uint8_t r = map(color.r);
uint8_t g = map(color.g);
uint8_t b = map(color.b);
file.write(&r, 1);
file.write(&g, 1);
file.write(&b, 1);

How to color individual pixels with OpenGL ES 2.0?

Is there possible to change the color of an individual pixel with OpenGL ES 2.0? Right now, I have found that I can manage that using a vertex. I've used this method to draw it:
GLES20.glDrawArrays(GLES20.GL_POINTS, 0, 1);
The size of the point was set to minimum in order to be a single pixel painted.
All good, until I've needed to draw 3 to 4 millions of them! It takes 5-6 seconds to initialize only one frame. This is time-inefficient as long as the pixels will be updated constantly. The update/ refresh would be preferable to be as close as possible to 60 fps.
How can I paint them in a more efficient way?
Note: It's a must to paint them individually only!
My attempt is here (for a screen of 1440x2560 px):
package com.example.ctelescu.opengl_pixel_draw;
import android.opengl.GLES20;
import android.opengl.GLSurfaceView;
import android.opengl.Matrix;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import javax.microedition.khronos.egl.EGLConfig;
import javax.microedition.khronos.opengles.GL10;
public class PixelDrawRenderer implements GLSurfaceView.Renderer {
private float[] mModelMatrix = new float[16];
private float[] mViewMatrix = new float[16];
private float[] mProjectionMatrix = new float[16];
private float[] mMVPMatrix = new float[16];
private final FloatBuffer mVerticesBuffer;
private int mMVPMatrixHandle;
private int mPositionHandle;
private int mColorHandle;
private final int mBytesPerFloat = 4;
private final int mStrideBytes = 7 * mBytesPerFloat;
private final int mPositionOffset = 0;
private final int mPositionDataSize = 3;
private final int mColorOffset = 3;
private final int mColorDataSize = 4;
public PixelDrawRenderer() {
// Define the vertices.
// final float[] vertices = {
// // X, Y, Z,
// // R, G, B, A
// -1f, 1f, 0.0f,
// 1.0f, 0.0f, 0.0f, 1.0f,
// -0.9f, 1.2f, 0.0f,
// 0.0f, 0.0f, 1.0f, 1.0f,
// -0.88f, 1.2f, 0.0f,
// 0.0f, 1.0f, 0.0f, 1.0f,
// -0.87f, 1.2f, 0.0f,
// 0.0f, 1.0f, 0.0f, 1.0f,
// -0.86f, 1.2f, 0.0f,
// 0.0f, 1.0f, 0.0f, 1.0f,
// -0.85f, 1.2f, 0.0f,
// 0.0f, 1.0f, 0.0f, 1.0f};
// Initialize the buffers.
mVerticesBuffer = ByteBuffer.allocateDirect(22579200 * mBytesPerFloat)
// mVerticesBuffer.put(vertices);
public void onSurfaceCreated(GL10 glUnused, EGLConfig config) {
// Set the background clear color to gray.
GLES20.glClearColor(0.5f, 0.5f, 0.5f, 0.5f);
// Position the eye behind the origin.
final float eyeX = 0.0f;
final float eyeY = 0.0f;
final float eyeZ = 1.5f;
// We are looking toward the distance
final float lookX = 0.0f;
final float lookY = 0.0f;
final float lookZ = -5.0f;
// Set our up vector. This is where our head would be pointing were we holding the camera.
final float upX = 0.0f;
final float upY = 1.0f;
final float upZ = 0.0f;
// Set the view matrix. This matrix can be said to represent the camera position.
// NOTE: In OpenGL 1, a ModelView matrix is used, which is a combination of a model and
// view matrix. In OpenGL 2, we can keep track of these matrices separately if we choose.
Matrix.setLookAtM(mViewMatrix, 0, eyeX, eyeY, eyeZ, lookX, lookY, lookZ, upX, upY, upZ);
final String vertexShader =
"uniform mat4 u_MVPMatrix; \n" // A constant representing the combined model/view/projection matrix.
+ "attribute vec4 a_Position; \n" // Per-vertex position information we will pass in.
+ "attribute vec4 a_Color; \n" // Per-vertex color information we will pass in.
+ "varying vec4 v_Color; \n" // This will be passed into the fragment shader.
+ "void main() \n" // The entry point for our vertex shader.
+ "{ \n"
+ " v_Color = a_Color; \n" // Pass the color through to the fragment shader.
// It will be interpolated across the vertex.
+ " gl_Position = u_MVPMatrix \n" // gl_Position is a special variable used to store the final position.
+ " * a_Position; \n" // Multiply the vertex by the matrix to get the final point in
+ " gl_PointSize = 0.1; \n"
+ "} \n"; // normalized screen coordinates.
final String fragmentShader =
+ "precision highp float; \n"
+ "#else \n"
+ "precision mediump float; \n" // Set the default precision to medium. We don't need as high of a
// precision in the fragment shader.
+ "#endif \n"
+ "varying vec4 v_Color; \n" // This is the color from the vertex shader interpolated across the
// vertex per fragment.
+ "void main() \n" // The entry point for our fragment shader.
+ "{ \n"
+ " gl_FragColor = v_Color; \n" // Pass the color directly through the pipeline.
+ "} \n";
// Load in the vertex shader.
int vertexShaderHandle = GLES20.glCreateShader(GLES20.GL_VERTEX_SHADER);
if (vertexShaderHandle != 0) {
// Pass in the shader source.
GLES20.glShaderSource(vertexShaderHandle, vertexShader);
// Compile the shader.
// Get the compilation status.
final int[] compileStatus = new int[1];
GLES20.glGetShaderiv(vertexShaderHandle, GLES20.GL_COMPILE_STATUS, compileStatus, 0);
// If the compilation failed, delete the shader.
if (compileStatus[0] == 0) {
vertexShaderHandle = 0;
if (vertexShaderHandle == 0) {
throw new RuntimeException("Error creating vertex shader.");
// Load in the fragment shader shader.
int fragmentShaderHandle = GLES20.glCreateShader(GLES20.GL_FRAGMENT_SHADER);
if (fragmentShaderHandle != 0) {
// Pass in the shader source.
GLES20.glShaderSource(fragmentShaderHandle, fragmentShader);
// Compile the shader.
// Get the compilation status.
final int[] compileStatus = new int[1];
GLES20.glGetShaderiv(fragmentShaderHandle, GLES20.GL_COMPILE_STATUS, compileStatus, 0);
// If the compilation failed, delete the shader.
if (compileStatus[0] == 0) {
fragmentShaderHandle = 0;
if (fragmentShaderHandle == 0) {
throw new RuntimeException("Error creating fragment shader.");
// Create a program object and store the handle to it.
int programHandle = GLES20.glCreateProgram();
if (programHandle != 0) {
// Bind the vertex shader to the program.
GLES20.glAttachShader(programHandle, vertexShaderHandle);
// Bind the fragment shader to the program.
GLES20.glAttachShader(programHandle, fragmentShaderHandle);
// Bind attributes
GLES20.glBindAttribLocation(programHandle, 0, "a_Position");
GLES20.glBindAttribLocation(programHandle, 1, "a_Color");
// Link the two shaders together into a program.
// Get the link status.
final int[] linkStatus = new int[1];
GLES20.glGetProgramiv(programHandle, GLES20.GL_LINK_STATUS, linkStatus, 0);
// If the link failed, delete the program.
if (linkStatus[0] == 0) {
programHandle = 0;
if (programHandle == 0) {
throw new RuntimeException("Error creating program.");
// Set program handles. These will later be used to pass in values to the program.
mMVPMatrixHandle = GLES20.glGetUniformLocation(programHandle, "u_MVPMatrix");
mPositionHandle = GLES20.glGetAttribLocation(programHandle, "a_Position");
mColorHandle = GLES20.glGetAttribLocation(programHandle, "a_Color");
// Tell OpenGL to use this program when rendering.
public void onSurfaceChanged(GL10 glUnused, int width, int height) {
// Set the OpenGL viewport to the same size as the surface.
GLES20.glViewport(0, 0, width, height);
// Create a new perspective projection matrix. The height will stay the same
// while the width will vary as per aspect ratio.
final float ratio = (float) width / height;
final float left = -ratio;
final float right = ratio;
final float bottom = -1.0f;
final float top = 1.0f;
final float near = 1.0f;
final float far = 10.0f;
Matrix.frustumM(mProjectionMatrix, 0, left, right, bottom, top, near, far);
float[] vertices = new float[22579200];
int counter = 0;
for (float i = -width / 2; i < width / 2; i++) {
for (float j = height / 2; j > -height / 2; j--) {
// Initialize the buffers.
vertices[counter++] = 2f * i * (1f / width); //X
vertices[counter++] = 2f * j * (1.5f / height); //Y
vertices[counter++] = 0; //Z
vertices[counter++] = 1f; //blue
vertices[counter++] = 1f; //green
vertices[counter++] = 0f; //blue
vertices[counter++] = 1f; //alpha
public void onDrawFrame(GL10 glUnused) {
// Draw the vertices facing straight on.
Matrix.setIdentityM(mModelMatrix, 0);
private void drawVertices(final FloatBuffer aVertexBuffer) {
// Pass in the position information
GLES20.glVertexAttribPointer(mPositionHandle, mPositionDataSize, GLES20.GL_FLOAT, false,
mStrideBytes, aVertexBuffer);
// Pass in the color information
GLES20.glVertexAttribPointer(mColorHandle, mColorDataSize, GLES20.GL_FLOAT, false,
mStrideBytes, aVertexBuffer);
// This multiplies the view matrix by the model matrix, and stores the result in the MVP matrix
// (which currently contains model * view).
Matrix.multiplyMM(mMVPMatrix, 0, mViewMatrix, 0, mModelMatrix, 0);
// This multiplies the modelview matrix by the projection matrix, and stores the result in the MVP matrix
// (which now contains model * view * projection).
Matrix.multiplyMM(mMVPMatrix, 0, mProjectionMatrix, 0, mMVPMatrix, 0);
GLES20.glUniformMatrix4fv(mMVPMatrixHandle, 1, false, mMVPMatrix, 0);
GLES20.glDrawArrays(GLES20.GL_POINTS, 0, 3225600);

CG Shader Semantics with OpenGL 3.x?

I used to have CG shaders working with vertex arrays in OpenGL 2.x, but I've updated to use VBOs and VAOs in OpenGL 3.x and now the semantics don't seem to be working, except for POSITION. CG doesn't throw up any compile errors, but if I set my output color in my fragment shader to my input normal value, I just get solid black. There's another answer that links to a page saying to use cgGLEnableClientState (which did nothing by itself) and cgGLSetParameterPointer (which seems crazy since I'm already sending the data to OpenGL, why send another copy through CG). So what am I missing?
Vertex Shader:
struct input
in uniform float4x4 worldViewProjMatrix;
in uniform float4x4 invTransWorldMatrix;
in uniform float4x4 worldMatrix;
in uniform float3 lightDir;
in uniform float3 eyePosition;
in varying float4 position : POSITION;
in varying float4 normal : NORMAL;
in varying float2 texCoord : TEXCOORD;
struct output
out varying float4 position : POSITION;
out varying float2 texCoord : TEXCOORD0;
out varying float3 light : TEXCOORD1;
out varying float3 normal : TEXCOORD2;
out varying float3 view : TEXCOORD3;
output main(input IN)
output OUT = output(0);
OUT.position = mul(IN.worldViewProjMatrix, IN.position);
OUT.texCoord = IN.texCoord;
OUT.light = IN.lightDir;
float3 worldPosition = normalize(mul(IN.worldMatrix, IN.position)).xyz;
OUT.view = IN.eyePosition - worldPosition;
OUT.normal = normalize(mul(IN.invTransWorldMatrix, IN.normal)).xyz;
return OUT;
Fragment Shader:
struct input {
in varying float2 texCoord : TEXCOORD0;
in varying float3 light : TEXCOORD1;
in varying float3 normal : TEXCOORD2;
in varying float3 view : TEXCOORD3;
in uniform float3 diffuse;
in uniform float3 ambient;
in uniform float3 specular;
in uniform float shininess;
in uniform sampler2D colorMapSampler;
float4 main(input IN) : COLOR
float4 color = tex2D(IN.colorMapSampler, IN.texCoord);
float3 normal = normalize(IN.normal);
float3 lightDir = normalize(IN.light);
float3 viewDir = normalize(IN.view);
float3 diff = saturate(dot(normal, lightDir));
float3 reflect = normalize(2 * diff * normal - lightDir);
float3 specular = pow(saturate(dot(reflect, viewDir)), IN.shininess);
float4 result;
//result = float4(color.rgb * (IN.ambient + IN.diffuse * diff) + IN.specular * specular, 1.0f);
result = float4(IN.normal, 1.0f);
return result;
I found someplace that listed these as the indices for glVertexAttribPointer, but they could easily be wrong (these are the Shader::POSITION, Shader::NORMAL, etc in the VBO setup function):
enum GenericVertexInputIndices
DIFFUSE = 3, COLOR0 = 3,
PSIZE = 6,
VBO setup function below:
void MeshObject::initVBO(const unsigned int&_indexVBO, unsigned int& _indexOffset)
glGenVertexArrays(1, &m_vao);
//sub in this section of the index data
m_indexOffset = _indexOffset;
_indexOffset = _indexOffset + m_indices.size();
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, m_indexOffset * sizeof(unsigned short), m_indices.size() * sizeof(unsigned short), &(m_indices[0]));
//init vertex data
glGenBuffers(1, &m_vertexVBO);
glBindBuffer(GL_ARRAY_BUFFER, m_vertexVBO);
glBufferData(GL_ARRAY_BUFFER, m_data.size() * sizeof(VertexData), &(m_data[0]), GL_STATIC_DRAW);
glVertexAttribPointer(Shader::POSITION, 3, GL_FLOAT, GL_FALSE, sizeof(VertexData), (char*)0);
glVertexAttribPointer(Shader::NORMAL, 3, GL_FLOAT, GL_FALSE, sizeof(VertexData), (char*)12);
glVertexAttribPointer(Shader::TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(VertexData), (char*)24);
Shader bind function below:
void Shader::bind(const matrix4 &_worldTransform, const Material::MaterialInfo &_info)
CGerror error;
//bind to the shader
CGprofile profile = renderGlobals.shaderMgr.getProfile(static_cast<Shader::ShaderType>(m_shaderType));
error = cgGetError();
error = cgGetError();
switch (m_shaderType)
case VERTEX:
//get vertex parameters
CGparameter worldMatrix = cgGetNamedParameter(m_program, "IN.worldMatrix");
CGparameter worldViewProjMatrix = cgGetNamedParameter(m_program, "IN.worldViewProjMatrix");
CGparameter invTransWorldMatrix = cgGetNamedParameter(m_program, "IN.invTransWorldMatrix");
CGparameter light = cgGetNamedParameter(m_program, "IN.lightDir");
CGparameter eyePosition = cgGetNamedParameter(m_program, "IN.eyePosition");
error = cgGetError();
//set vertex parameters
matrix4 worldViewProj = *(renderGlobals.debugCamera.getViewProjectionMatrix()) * _worldTransform;
cgGLSetMatrixParameterfc(worldViewProjMatrix, worldViewProj.m16);
matrix4 invTransWorld = _worldTransform.getInverse().getTranspose();
if (invTransWorldMatrix != NULL)
cgGLSetMatrixParameterfc(invTransWorldMatrix, invTransWorld.m16);
if (worldMatrix != NULL)
cgGLSetMatrixParameterfc(worldMatrix, _worldTransform.m16);
vector3 lightPos = *renderGlobals.debugCamera.getPosition();
//vector3 lightPos = vector3(0.0f, 0.0f, 0.0f);
vector3 lightDir = lightPos - _worldTransform.wAxis;
if (light != NULL)
cgGLSetParameter3fv(light, lightDir.v);
if (eyePosition != NULL)
cgGLSetParameter3fv(eyePosition, renderGlobals.debugCamera.getPosition()->v);
error = cgGetError();
//set up material info
CGparameter diffuse = cgGetNamedParameter(m_program, "IN.diffuse");
CGparameter ambient = cgGetNamedParameter(m_program, "IN.ambient");
CGparameter specular = cgGetNamedParameter(m_program, "IN.specular");
CGparameter shininess = cgGetNamedParameter(m_program, "IN.shininess");
if (diffuse != NULL)
cgGLSetParameter3fv(diffuse, _info.diffuse.rgb);
if (ambient != NULL)
cgGLSetParameter3fv(ambient, _info.ambient.rgb);
if (specular != NULL)
cgGLSetParameter3fv(specular, _info.specular.rgb);
if (shininess != NULL)
cgGLSetParameter1f(shininess, _info.shininess);
//set up textures
CGparameter colorMapSampler = cgGetNamedParameter(m_program, "IN.colorMapSampler");
if (colorMapSampler != NULL)
if (_info.textureInfo[0].size() > 0)
Index<Texture> texture = _info.textureInfo[0][0].texture;
cgGLSetTextureParameter(colorMapSampler, texture->getID());
} else {
//ERROR: tryin to bind a shader with an unknown type
Changed the semantics in the vertex input structure to use ATTR* matching what I have in the GenericVertexInputIndices and voila, it works. I had tried changing all my semantics to ATTR* in both the vertex and fragment shader before and gotten a bunch of domain conflict errors, but didn't notice that it didn't complain about the ones in the vertex input structure. Apparently they're only for vertex input. Another small detail that totally screws everything up.