How to get parallel GPU pixel rendering? For voxel ray tracing - vulkan

I made a voxel raycaster in Unity using a compute shader and a texture. But at 1080p, it is limited to a view distance of only 100 at 30 fps. With no light bounces yet or anything, I am quite disappointed with this performance.
I tried learning Vulkan and the best tutorials are based on rasterization, and I guess all I really want to do is compute pixels in parallel on the GPU. I am familiar with CUDA and I've read that is sometimes used for rendering? Or is there a simple way of just computing pixels in parallel in Vulcan? I've already got a template Vulkan project that opens a blank window. I don't need to get any data back from the GPU just render straight to the screen after giving it data.
And with the code below would it be significantly faster in Vulkan as opposed to a Unity compute shader? It has A LOT of if/else statements in it which I have read is bad for GPUs but I can't think of any other way of writing it.
EDIT: I optimized it as much as I could but it's still pretty slow, like 30 fps at 1080p.
Here is the compute shader:
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
const float width; // in pixels
const float height;
const StructuredBuffer<int> voxelMaterials; // for now just getting a flat voxel array
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const StructuredBuffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
[numthreads(8,8,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
Result[id.xy] = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the direction to that point
float distanceTraveled = 0;
int3 directionAxes; // 1 for positive, 0 for zero, -1 for negative
int3 directionIfReplacements = { 0, 0, 0 }; // 1 for positive, 0 for zero, -1 for negative
float3 axesUnit = { 1 / abs(direction.x), 1 / abs(direction.y), 1 / abs(direction.z) };
float3 distancesXYZ = { 1000, 1000, 1000 };
int face = 0; // 1 = x, 2 = y, 3 = z // the current face the while loop point is on
// comparing the floats once in the beginning so the rest of the ray traversal can compare ints
if (direction.x > 0) {
directionAxes.x = 1;
directionIfReplacements.x = 1;
}
else if (direction.x < 0) {
directionAxes.x = -1;
}
else {
distanceTraveled = maxRayDistance; // just ending the ray for now if one of it's direction axes is exactly 0. You'll see a line of black pixels if the player's rotation is zero but this never happens naturally
directionAxes.x = 0;
}
if (direction.y > 0) {
directionAxes.y = 1;
directionIfReplacements.y = 1;
}
else if (direction.y < 0) {
directionAxes.y = -1;
}
else {
distanceTraveled = maxRayDistance;
directionAxes.y = 0;
}
if (direction.z > 0) {
directionAxes.z = 1;
directionIfReplacements.z = 1;
}
else if (direction.z < 0) {
directionAxes.z = -1;
}
else {
distanceTraveled = maxRayDistance;
directionAxes.z = 0;
}
// calculating the first point
if (playerCameraPosition.x < voxelBufferRowSize &&
playerCameraPosition.x >= 0 &&
playerCameraPosition.y < voxelBufferRowSize &&
playerCameraPosition.y >= 0 &&
playerCameraPosition.z < voxelBufferRowSize &&
playerCameraPosition.z >= 0)
{
int voxelIndex = floor(playerCameraPosition.x) + (floor(playerCameraPosition.z) * voxelBufferRowSize) + (floor(playerCameraPosition.y) * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0);
distanceTraveled = maxRayDistance; // to end the while loop
break;
case 2:
Result[id.xy] = float4(0, 1, 0, 0);
distanceTraveled = maxRayDistance;
break;
case 3:
Result[id.xy] = float4(0, 0, 1, 0);
distanceTraveled = maxRayDistance;
break;
default:
break;
}
}
// traversing the ray beyond the first point
while (distanceTraveled < maxRayDistance)
{
switch (face) {
case 1:
distancesXYZ.x = axesUnit.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
case 2:
distancesXYZ.y = axesUnit.y;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
case 3:
distancesXYZ.z = axesUnit.z;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
break;
default:
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
}
face = 0; // 1 = x, 2 = y, 3 = z
float smallestDistance = 1000;
if (distancesXYZ.x < smallestDistance) {
smallestDistance = distancesXYZ.x;
face = 1;
}
if (distancesXYZ.y < smallestDistance) {
smallestDistance = distancesXYZ.y;
face = 2;
}
if (distancesXYZ.z < smallestDistance) {
smallestDistance = distancesXYZ.z;
face = 3;
}
if (smallestDistance == 0) {
break;
}
int3 facesIfReplacement = { 1, 1, 1 };
switch (face) { // directionIfReplacements is positive if positive but I want to subtract so invert it to subtract 1 when negative subtract nothing when positive
case 1:
facesIfReplacement.x = 1 - directionIfReplacements.x;
break;
case 2:
facesIfReplacement.y = 1 - directionIfReplacements.y;
break;
case 3:
facesIfReplacement.z = 1 - directionIfReplacements.z;
break;
}
pointHolder += direction * smallestDistance; // the acual ray marching
distanceTraveled += smallestDistance;
int3 voxelIndexXYZ = { -1,-1,-1 }; // the integer coordinates within the buffer
voxelIndexXYZ.x = ceil(pointHolder.x - facesIfReplacement.x);
voxelIndexXYZ.y = ceil(pointHolder.y - facesIfReplacement.y);
voxelIndexXYZ.z = ceil(pointHolder.z - facesIfReplacement.z);
//check if voxelIndexXYZ is within bounds of the voxel buffer before indexing the array
if (voxelIndexXYZ.x < voxelBufferRowSize &&
voxelIndexXYZ.x >= 0 &&
voxelIndexXYZ.y < voxelBufferRowSize &&
voxelIndexXYZ.y >= 0 &&
voxelIndexXYZ.z < voxelBufferRowSize &&
voxelIndexXYZ.z >= 0)
{
int voxelIndex = voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance; // to end the while loop
break;
case 2:
Result[id.xy] = float4(0, 1, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
break;
case 3:
Result[id.xy] = float4(0, 0, 1, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
break;
}
}
else {
break; // should be uncommented in actual game implementation where the player will always be inside the voxel buffer
}
}
}
Depending on the voxel data you give it it produces this:
And here is the shader after "optimizing" it and taking out all branching or diverging conditional statements (I think):
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
float4 resultHolder;
const float width; // in pixels
const float height;
const Buffer<int> voxelMaterials; // for now just getting a flat voxel array
const Buffer<float4> voxelColors;
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const Buffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
[numthreads(16, 16, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
resultHolder = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the transformed ray direction in world space
const bool anyDir0 = direction.x == 0 || direction.y == 0 || direction.z == 0; // preventing a division by zero
float distanceTraveled = maxRayDistance * anyDir0;
const float3 nonZeroDirection = { // to prevent a division by zero
direction.x + (1 * anyDir0),
direction.y + (1 * anyDir0),
direction.z + (1 * anyDir0)
};
const float3 axesUnits = { // the distances if the axis is an integer
1.0f / abs(nonZeroDirection.x),
1.0f / abs(nonZeroDirection.y),
1.0f / abs(nonZeroDirection.z)
};
const bool3 isDirectionPositiveOr0 = {
direction.x >= 0,
direction.y >= 0,
direction.z >= 0
};
while (distanceTraveled < maxRayDistance)
{
const bool3 pointIsAnInteger = {
(int)pointHolder.x == pointHolder.x,
(int)pointHolder.y == pointHolder.y,
(int)pointHolder.z == pointHolder.z
};
const float3 distancesXYZ = {
((floor(pointHolder.x + isDirectionPositiveOr0.x) - pointHolder.x) / direction.x * !pointIsAnInteger.x) + (axesUnits.x * pointIsAnInteger.x),
((floor(pointHolder.y + isDirectionPositiveOr0.y) - pointHolder.y) / direction.y * !pointIsAnInteger.y) + (axesUnits.y * pointIsAnInteger.y),
((floor(pointHolder.z + isDirectionPositiveOr0.z) - pointHolder.z) / direction.z * !pointIsAnInteger.z) + (axesUnits.z * pointIsAnInteger.z)
};
float smallestDistance = min(distancesXYZ.x, distancesXYZ.y);
smallestDistance = min(smallestDistance, distancesXYZ.z);
pointHolder += direction * smallestDistance;
distanceTraveled += smallestDistance;
const int3 voxelIndexXYZ = {
floor(pointHolder.x) - (!isDirectionPositiveOr0.x && (int)pointHolder.x == pointHolder.x),
floor(pointHolder.y) - (!isDirectionPositiveOr0.y && (int)pointHolder.y == pointHolder.y),
floor(pointHolder.z) - (!isDirectionPositiveOr0.z && (int)pointHolder.z == pointHolder.z)
};
const bool inBounds = (voxelIndexXYZ.x < voxelBufferRowSize && voxelIndexXYZ.x >= 0) && (voxelIndexXYZ.y < voxelBufferRowSize && voxelIndexXYZ.y >= 0) && (voxelIndexXYZ.z < voxelBufferRowSize && voxelIndexXYZ.z >= 0);
const int voxelIndexFlat = (voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize)) * inBounds; // meaning the voxel on 0,0,0 will always be empty and act as a our index out of range prevention
if (voxelMaterials[voxelIndexFlat] > 0) {
resultHolder = voxelColors[voxelMaterials[voxelIndexFlat]] * (1 - (distanceTraveled / maxRayDistance));
break;
}
if (!inBounds) break;
}
Result[id.xy] = resultHolder;
}

Compute shader is what it is: a program that runs on a GPU, be it on vulkan, or in Unity, so you are doing it in parallel either way. The point of vulkan, however, is that it gives you more control about the commands being executed on GPU - synchronization, memory, etc. So its not neccesseraly going to be faster in vulkan than in unity. So, what you should do is actually optimise your shaders.
Also, the main problem with if/else is divergence within groups of invocations which operate in lock-step. So, if you can avoid it, the performance impact will be far lessened. These may help you with that.
If you still want to do all that in vulkan...
Since you are not going to do any of the triangle rasterisation, you probably won't need renderpasses or graphics pipelines that the tutorials generally show. Instead you are going to need a compute shader pipeline. Those are far simplier than graphics pipelines, only requiring one shader and the pipeline layout(the inputs and outputs are bound via descriptor sets).
You just need to pass the swapchain image to the compute shader as a storage image in a descriptor (and of course any other data your shader may need, all are passed via descriptors). For that you need to specify VK_IMAGE_USAGE_STORAGE_BIT in your swapchain creation structure.
Then, in your command buffer you bind the descriptor sets with image and other data, bind the compute pipeline, and dispatch it as you probably do in Unity. The swapchain presentation and submitting the command buffers shouldn't be different than how the graphics works in the tutorials.

Related

camera slips trough the terrain limit (the given edges of the terrain) every time i rotate the camera - how can i fix that?

I already tried to clamp it with mathf or hardocding it, but it didnt help either, it just slips trough, every time I rotate the camera on the edge of the map (min/max hsp - min/max vsp) the given edges of the terrain.
I'm out of ideas and searched days for a similar problem in hope to see my mistake and now I'm here. Hopefully someone can help me and show me the mistake I made. I want to learn so badly. you can see the code I used here:
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class CameraMovementScript : MonoBehaviour
{
float speed = 1f;
float zoomSpeed = 10f;
float rotateSpeed = 50f;
float maxHeight = 200f;
float minHeight = 20f;
float minHsp = 10f;
float maxHsp = 750f;
float minVsp = 10f;
float maxVsp = 750f;
Vector2 p1;
Vector2 p2;
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
if (Input.GetKey(KeyCode.LeftShift))
{
speed = 1f;
zoomSpeed = 2000f;
}
else
{
speed = 0.5f;
zoomSpeed = 1550f;
}
float hsp = transform.position.y * speed * Input.GetAxis("Horizontal") * Time.deltaTime;
float vsp = transform.position.y * speed * Input.GetAxis("Vertical") * Time.deltaTime;
float scrollSP = Mathf.Log(transform.position.y) * -zoomSpeed * Input.GetAxis("Mouse ScrollWheel") * Time.deltaTime;
if ((transform.position.y >= maxHeight) && (scrollSP > 0))
{
scrollSP = 0;
}
else if ((transform.position.y <= minHeight) && (scrollSP <0))
{
scrollSP = 0;
}
if((transform.position.y + scrollSP) > maxHeight)
{
scrollSP = maxHeight - transform.position.y;
}
else if((transform.position.y + scrollSP) < minHeight)
{
scrollSP = minHeight - transform.position.y;
}
if ((transform.position.x >= maxHsp) && (hsp > 0))
{
hsp = 0;
}
else if ((transform.position.x <= minHsp) && (hsp < 0))
{
hsp = 0;
}
if ((transform.position.x + hsp) > maxHsp)
{
hsp = maxHsp - transform.position.x;
}
else if ((transform.position.x + hsp) < minHsp)
{
hsp = minHsp - transform.position.x;
}
if ((transform.position.z >= maxVsp) && (vsp > 0))
{
vsp = 0;
}
else if ((transform.position.z <= minVsp) && (vsp < 0))
{
vsp = 0;
}
if ((transform.position.z + vsp) > maxVsp)
{
vsp = maxVsp - transform.position.z;
}
else if ((transform.position.z + vsp) < minVsp)
{
vsp = minVsp - transform.position.z;
}
Vector3 verticalMove = new Vector3(0,scrollSP,0);
Vector3 lateralMove = hsp * transform.right;
Vector3 forwardMove = transform.forward;
forwardMove.y = 0;
forwardMove.Normalize();
forwardMove *= vsp;
Vector3 move = verticalMove + lateralMove + forwardMove;
transform.position += move;
getCameraRotation();
}
void getCameraRotation()
{
if(Input.GetMouseButtonDown(2))
{
p1 = Input.mousePosition;
}
if(Input.GetMouseButton(2))
{
p2 = Input.mousePosition;
float dx = (p2 - p1).x * rotateSpeed * Time.deltaTime;
float dy = (p2 - p1).y * rotateSpeed * Time.deltaTime;
transform.rotation *= Quaternion.Euler(new Vector3(0,dx,0));
transform.GetChild(0).transform.rotation *= Quaternion.Euler(new Vector3(-dy,0,0));
p1 = p2;
}
}
}

numWeights corresponding to mnumVertices?

I have one issue left with ASSIMP DIRECT X C++ ANIMATION WITH SKELETON.
for (UINT m = 0; m < currentMesh->mBones[k]->mNumWeights; m++) //verticer som påverkas
{
vertexVector[k].joints.x = currentMesh->mBones[k]->mWeights[m].mVertexId;
That code shows all vertices affected by a bone - k, inside an iteration.
All of these vertices must have the same vert ID since they are all affected by the same bone/joint.
The problem is, I need to make a list of every vertex and a list of every indice of a face, where I store position, UV, Normal etc.
The list that displays all of the vertices, is not in the same order obviously as the lists that displays all the vertices affected by each bone.
So how can I combine these lists?
"vertexVector"... etc is an example of a list with jointInfo that is corresponding to vertexID.
It has room for more places and another variable for the weight.
But that list doesn't work obviously.
What am I doing wrong with Assimp? Hope this was a clear post.
UPdate this is how i build the matrices: I don't know what is wrong.
void jointTransform(float
timeInSeconds, std::vector<DirectX::XMMATRIX>& transformM, aiAnimation*
ani, UINT nrOfJoints, std::vector<joints>& jointInfo, const aiScene*
scenePtr)
{
DirectX::XMMATRIX iD = DirectX::XMMatrixIdentity();
float ticksPerSecond = (float)ani->mTicksPerSecond;
if (ticksPerSecond == 0)
{
ticksPerSecond = 30;
}
float timeInTicks = timeInSeconds * ticksPerSecond;
float animationTime = fmod(timeInTicks, (float)ani->mDuration);
readNodeHeiarchy(animationTime, scenePtr->mRootNode, iD, jointInfo, ani,
scenePtr);
transformM.resize(nrOfJoints);
for (UINT i = 0; i < transformM.size(); i++)
{
transformM[i] = jointInfo[i].transformFinal;
}
}
void readNodeHeiarchy(float time, const aiNode* node, DirectX::XMMATRIX
parentMat, std::vector<joints>& jointInfo, aiAnimation* ani, const
aiScene* scenePtr)
{
std::string nodeNameString = node->mName.data;
//Skapa en parentTransform från noden. Som sedan skickas in som parent
matris, första gången är det identitetsmatrisen.
aiMatrix4x4 nodeTransform = node->mTransformation;
DirectX::XMMATRIX combined;
combined = DirectX::XMMatrixSet(nodeTransform.a1, nodeTransform.a2,
nodeTransform.a3, nodeTransform.a4,
nodeTransform.b1, nodeTransform.b2, nodeTransform.b3, nodeTransform.b4,
nodeTransform.c1, nodeTransform.c2, nodeTransform.c3, nodeTransform.c4,
nodeTransform.d1, nodeTransform.d2, nodeTransform.d3,
nodeTransform.d4);
const aiNodeAnim* joint = nullptr;
//Kolla om noden är ett ben.
for (UINT i = 0; i < ani->mNumChannels; i++)
{
if (nodeNameString == ani->mChannels[i]->mNodeName.data)
{
joint = ani->mChannels[i];
}
}
DirectX::XMMATRIX globalTransform = DirectX::XMMatrixIdentity();
//om den är ett ben så är joint inte längre nullptr, den blir det benet.
if (joint)
{
DirectX::XMMATRIX S;
DirectX::XMMATRIX R;
DirectX::XMMATRIX T;
//scale
aiVector3D scaleV;
calcLerpScale(scaleV, time, joint);
S = DirectX::XMMatrixScaling(scaleV.x, scaleV.y, scaleV.z);
//rotate
aiQuaternion rotationQ;
calcLerpRot(rotationQ, time, joint);
DirectX::XMVECTOR q;
q = DirectX::XMVectorSet(rotationQ.x, rotationQ.y, rotationQ.z,
rotationQ.w);
R = DirectX::XMMatrixRotationQuaternion(q);
//translate
aiVector3D transV;
calcLerpTrans(transV, time, joint);
T = DirectX::XMMatrixTranslation(transV.x, transV.y, transV.z);
combined = S * R * T;
globalTransform = combined * parentMat;
}
//DirectX::XMMATRIX globalTransform = combined * parentMat;
//if (jointInfo[jointInfo.size() - 1].name.C_Str() != nodeNameString)
//{
for (UINT i = 0; i < jointInfo.size(); i++)
{
if (jointInfo[i].name.C_Str() == nodeNameString)
{
OutputDebugStringA("\n");
OutputDebugStringA(jointInfo[i].name.C_Str());
OutputDebugStringA("\n");
aiMatrix4x4 off = jointInfo[i].offsetM;
DirectX::XMMATRIX offset;
offset = DirectX::XMMatrixSet(off.a1, off.a2, off.a3, off.a4,
off.b1, off.b2, off.b3, off.b4,
off.c1, off.c2, off.c3, off.c4,
off.d1, off.d2, off.d3, off.d4);
DirectX::XMMATRIX rootMInv;
aiMatrix4x4 rootInv = scenePtr->mRootNode-
>mTransformation.Inverse();
rootMInv = DirectX::XMMatrixSet(rootInv.a1, rootInv.a2,
rootInv.a3, rootInv.a4,
rootInv.b1, rootInv.b2, rootInv.b3, rootInv.b4,
rootInv.c1, rootInv.c2, rootInv.c3, rootInv.c4,
rootInv.d1, rootInv.d2, rootInv.d3, rootInv.d4);
jointInfo[i].transformFinal = offset * globalTransform *
rootMInv;
break;
}
}
//}
for (UINT i = 0; i < node->mNumChildren; i++)
{
readNodeHeiarchy(time, node->mChildren[i], globalTransform, jointInfo,
ani, scenePtr);
}
}
void calcLerpScale(aiVector3D& scale, float aniTime, const aiNodeAnim*
joint)
{
if (joint->mNumScalingKeys == 1)
{
scale = joint->mScalingKeys[0].mValue;
return;
}
UINT scaleInd = findIndexS(aniTime, joint);
UINT nextScale = scaleInd + 1;
assert(nextScale < joint->mNumScalingKeys);
float deltaTime = (float)joint->mScalingKeys[nextScale].mTime -
(float)joint->mScalingKeys[scaleInd].mTime;
float factor = (aniTime - (float)joint->mScalingKeys[scaleInd].mTime) /
deltaTime;
assert(factor >= 0.0f && factor <= 1.0f);
const aiVector3D& startScaleV = joint->mScalingKeys[scaleInd].mValue;
const aiVector3D& endScaleV = joint->mScalingKeys[nextScale].mValue;
//interpolate
aiVector3D Delta = endScaleV - startScaleV; // längden
scale = startScaleV + (factor * Delta); //gå ett antal steg beroende på
faktorn mellan start och slut.
scale.Normalize();
}
void calcLerpRot(aiQuaternion& rotation, float aniTime, const aiNodeAnim*
joint)
{
if (joint->mNumRotationKeys == 1)
{
rotation = joint->mRotationKeys[0].mValue;
return;
}
UINT rotIndex = findIndexRot(aniTime, joint);
UINT nextRot = (rotIndex + 1);
assert(nextRot < joint->mNumRotationKeys);
float deltaTime = (float)joint->mRotationKeys[nextRot].mTime -
(float)joint->mRotationKeys[rotIndex].mTime;
float factor = (aniTime - (float)joint->mRotationKeys[rotIndex].mTime) /
deltaTime;
assert(factor >= 0.0f && factor <= 1.0f);
const aiQuaternion& StartRotationQ = joint->mRotationKeys[rotIndex].mValue;
const aiQuaternion& EndRotationQ = joint->mRotationKeys[nextRot].mValue;
aiQuaternion::Interpolate(rotation, StartRotationQ, EndRotationQ, factor);
rotation.Normalize();
}
void calcLerpTrans(aiVector3D& translation, float aniTime, const
aiNodeAnim*
joint)
{
if (joint->mNumPositionKeys == 1)
{
translation = joint->mPositionKeys[0].mValue;
return;
}
UINT transIndex = findIndexT(aniTime, joint);
UINT nextTrans = (transIndex + 1);
assert(nextTrans < joint->mNumPositionKeys);
float deltaTime = (float)joint->mPositionKeys[nextTrans].mTime -
(float)joint->mPositionKeys[transIndex].mTime;
float factor = (aniTime - (float)joint->mPositionKeys[transIndex].mTime) /
deltaTime;
assert(factor >= 0.0f && factor <= 1.0f);
const aiVector3D& startTransV = joint->mPositionKeys[transIndex].mValue;
const aiVector3D& endTransV = joint->mPositionKeys[nextTrans].mValue;
//interpolate
aiVector3D Delta = endTransV - startTransV;
translation = startTransV + (factor * Delta);
translation.Normalize();
}
UINT findIndexRot(float aniTime, const aiNodeAnim* joint)
{
assert(joint->mNumRotationKeys > 0);
for (UINT i = 0; i < joint->mNumRotationKeys - 1; i++)
{
if (aniTime < (float)joint->mRotationKeys[i + 1].mTime)
{
return i;
}
}
assert(0);
}
}
Not sure what you mean by "All of these vertices must have the same vert ID" - the vertex id's of the k:th bone, according to mBones[k]->mWeights[..].mVertexId, are indices to vertices influenced by this bone, and they are going to be different (otherwise there would be either redundancy of conflict).
You probably want to have bone indices and bone weights as part of the vertex definition for easy handling in a shader. Something like
struct vertex (
vec3 pos;
vec3 normal;
float bone_weights[N]; // weights of bones influencing this vertex
unsigned bone_indices[N]; // indices of bones influencing this vertex
}
std::vector<vertex> mesh_vertices;
Where N is the maximum number of influence bones per vertex. A common value is four, but this depends on the mesh your are importing.
Based on your example, a rough draft could be something like this:
// k:th bone of bones in currentMesh
for (UINT m = 0; m < currentMesh->mBones[k]->mNumWeights; m++)
{
float bone_weight = currentMesh->mBones[k]->mWeights[m].mWeight;
unsigned vertex_index = currentMesh->mBones[k]->mWeights[m].mVertexId;
mesh_vertices[vertex_index].bone_weights[m] = bone_weight;
mesh_vertices[vertex_index].bone_indices[m] = k;
}
Here we've assumed that mNumWeights = N, but this needs to checked, as mentioned.

In vulkan: I want save a depth image to file, but always got a error depth image

I want to save a depth image that from frame buffer render result.
1, I create a stage buffer used to save image data.
2, use vkCmdCopyImageToBuffer copy depth image to stage buffer.
3, use vkMapMemory map this stage buffer memory to host memory.
4, read host memory and write depth data to a file.
but always got an error depth image. I don't know where have wrong.
application window output.
bug depth image file.
(source file)
save depth image function:
VkDeviceSize size = WIDTH * HEIGHT * 4;
VkBuffer dstBuffer;
VkDeviceMemory dstMemory;
createBuffer(
size,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
dstBuffer,
dstMemory);
VkCommandBuffer copyCmd = beginSingleTimeCommands();
// depth format -> VK_FORMAT_D32_SFLOAT_S8_UINT
VkBufferImageCopy region = {};
region.bufferOffset = 0;
region.bufferImageHeight = 0;
region.bufferRowLength = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
region.imageSubresource.mipLevel = 0;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageOffset = VkOffset3D{ 0, 0, 0 };
region.imageExtent = VkExtent3D{ swapChainExtent.width, swapChainExtent.height, 1};
vkCmdCopyImageToBuffer(
copyCmd,
depthImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
dstBuffer,
1,
&region
);
endSingleTimeCommands(copyCmd);
// Map image memory so we can start copying from it
void *data;
vkMapMemory(device, dstMemory, 0, size, 0, &data);
std::ofstream file(path, std::ios::out | std::ios::binary);
// ppm header
file << "P6\n" << WIDTH << "\n" << HEIGHT << "\n" << 255 << "\n";
float *row = (float*)data;
auto size_v = WIDTH * HEIGHT;
for (uint32_t y = 0; y < size_v; y++) {
file.write((char*)row + 1, 1);
file.write((char*)row + 1, 1);
file.write((char*)row + 1, 1);
row++;
}
file.close();
// Clean up resources
vkUnmapMemory(device, dstMemory);
vkFreeMemory(device, dstMemory, nullptr);
vkDestroyBuffer(device, dstBuffer, nullptr);
hope someone drag me out. thanks!
Assuming you've done all the transfer work correctly, your mapped data is basically an array of floats. This is reflected in your code by this line:
float *row = (float*)data;
However, when you actually write out the file you're treating the data like bytes...
file.write((char*)row + 1, 1);
So you're writing out 8 bytes of a 32 bit float. What you need is some function to convert from the float to a color value.
Assuming the depth value is normalized (I can't remember off the top of my head whether this is the case, or if it's dependent on the pipeline or framebuffer setup) and if you just want greyscale, you could use
uint8_t map(float f) {
return (uint8_t)(f * 255.0f);
}
and inside your file writing loop you'd so something like
uint8_t grey = map(*row);
file.write(&grey, 1);
file.write(&grey, 1);
file.write(&grey, 1);
++row;
Alternatively if you want some sort of color gradient for easier visulization you'd want a more complex mapping function...
vec3 colorWheel(float normalizedHue) {
float v = normalizedHue * 6.f;
if (v < 0.f) {
return vec3(1.f, 0.f, 0.f);
} else if (v < 1.f) {
return vec3(1.f, v, 0.f);
} else if (v < 2.f) {
return vec3(1.f - (v-1.f), 1.f, 0.f);
} else if (v < 3.f) {
return vec3(0.f, 1.f, (v-2.f));
} else if (v < 4.f) {
return vec3(0.f, 1.f - (v-3.f), 1.f );
} else if (v < 5.f) {
return vec3((v-4.f), 0.f, 1.f );
} else if (v < 6.f) {
return vec3(1.f, 0.f, 1.f - (v-5.f));
} else {
return vec3(1.f, 0.f, 0.f);
}
}
and in your file output loop...
vec3 color = colorWheel(*row);
uint8_t r = map(color.r);
uint8_t g = map(color.g);
uint8_t b = map(color.b);
file.write(&r, 1);
file.write(&g, 1);
file.write(&b, 1);
++row;

'Grenade' projection based on angle + bounce

I'm having some trouble with synthesizing an advanced object projection formula. I have already figured out few basic physic simulation formulas such as:
Velocity of object:
x += cos(angle);
y += sin(angle);
*where angle can be obtained by either mouse position or with tan(...target and intial values)
but that only travels straight based on the angle.
Gravity:
Yvelocity = Yvelocity - gravity;
if(!isHitPlatform) {
Obj.y += YVelocity
}
Bounce:// No point if we've not been sized...
if (height > 0) {
// Are we bouncing...
if (bounce) {
// Add the vDelta to the yPos
// vDelta may be postive or negative, allowing
// for both up and down movement...
yPos += vDelta;
// Add the gravity to the vDelta, this will slow down
// the upward movement and speed up the downward movement...
// You may wish to place a max speed to this
vDelta += gDelta;
// If the sprite is not on the ground...
if (yPos + SPRITE_HEIGHT >= height) {
// Seat the sprite on the ground
yPos = height - SPRITE_HEIGHT;
// If the re-bound delta is 0 or more then we've stopped
// bouncing...
if (rbDelta >= 0) {
// Stop bouncing...
bounce = false;
} else {
// Add the re-bound degregation delta to the re-bound delta
rbDelta += rbDegDelta;
// Set the vDelta...
vDelta = rbDelta;
}
}
}
}
I need help way to combine these three formulas to create an efficient and lightweight algorithm that allows an object to be projected in an arch determined by the angle, yet continues to bounce a few times before coming to a stop, all with an acceptable amount of discontinuity between each point. *Note: Having the grenade be determined by a f(x) = -x^2 formula creates a larger jump discontinuity as the slope increases, forcing you to reverse the formula to find x = +-y value (to determine whether + or -, check the bounds).
something like:
class granade
{
private static final double dt = 0.1; // or similar
private double speedx;
private double speedy;
private double positionx;
private double positiony;
public granade(double v, double angle)
{
speedx = v * Math.cos(angle);
speedy = v * Math.sin(angle);
positionx = 0;
positiony = 0;
}
public void nextframe()
{
// update speed: v += a*dt
speedy -= gravity* dt;
// update position: pos += v*dt
positionx += speedx * dt;
double newpositiony = positiony + speedy*dt;
// bounce if hit ground
if (newpositiony > 0)
positiony = newpositiony;
else {
// bounce vertically
speedy *= -1;
positiony = -newpositiony;
}
}
public void draw() { /* TODO */ }
}
OT: avoid Math.atan(y/x), use Math.atan2(y, x)

Example of using Audio Queue Services

I am seeking an example of using Audio Queue Services.
I would like to create a sound using a mathematical equation and then hear it.
Here's my code for generating sound from a function. I'm assuming you know how to use AudioQueue services, set up an AudioSession, and properly start and stop an audio output queue.
Here's a snippet for setting up and starting an output AudioQueue:
// Get the preferred sample rate (8,000 Hz on iPhone, 44,100 Hz on iPod touch)
size = sizeof(sampleRate);
err = AudioSessionGetProperty (kAudioSessionProperty_CurrentHardwareSampleRate, &size, &sampleRate);
if (err != noErr) NSLog(#"AudioSessionGetProperty(kAudioSessionProperty_CurrentHardwareSampleRate) error: %d", err);
//NSLog (#"Current hardware sample rate: %1.0f", sampleRate);
BOOL isHighSampleRate = (sampleRate > 16000);
int bufferByteSize;
AudioQueueBufferRef buffer;
// Set up stream format fields
AudioStreamBasicDescription streamFormat;
streamFormat.mSampleRate = sampleRate;
streamFormat.mFormatID = kAudioFormatLinearPCM;
streamFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
streamFormat.mBitsPerChannel = 16;
streamFormat.mChannelsPerFrame = 1;
streamFormat.mBytesPerPacket = 2 * streamFormat.mChannelsPerFrame;
streamFormat.mBytesPerFrame = 2 * streamFormat.mChannelsPerFrame;
streamFormat.mFramesPerPacket = 1;
streamFormat.mReserved = 0;
// New output queue ---- PLAYBACK ----
if (isPlaying == NO) {
err = AudioQueueNewOutput (&streamFormat, AudioEngineOutputBufferCallback, self, nil, nil, 0, &outputQueue);
if (err != noErr) NSLog(#"AudioQueueNewOutput() error: %d", err);
// Enqueue buffers
//outputFrequency = 0.0;
outputBuffersToRewrite = 3;
bufferByteSize = (sampleRate > 16000)? 2176 : 512; // 40.5 Hz : 31.25 Hz
for (i=0; i<3; i++) {
err = AudioQueueAllocateBuffer (outputQueue, bufferByteSize, &buffer);
if (err == noErr) {
[self generateTone: buffer];
err = AudioQueueEnqueueBuffer (outputQueue, buffer, 0, nil);
if (err != noErr) NSLog(#"AudioQueueEnqueueBuffer() error: %d", err);
} else {
NSLog(#"AudioQueueAllocateBuffer() error: %d", err);
return;
}
}
// Start playback
isPlaying = YES;
err = AudioQueueStart(outputQueue, nil);
if (err != noErr) { NSLog(#"AudioQueueStart() error: %d", err); isPlaying= NO; return; }
} else {
NSLog (#"Error: audio is already playing back.");
}
Here's the part that generates the tone:
// AudioQueue output queue callback.
void AudioEngineOutputBufferCallback (void *inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer) {
AudioEngine *engine = (AudioEngine*) inUserData;
[engine processOutputBuffer:inBuffer queue:inAQ];
}
- (void) processOutputBuffer: (AudioQueueBufferRef) buffer queue:(AudioQueueRef) queue {
OSStatus err;
if (isPlaying == YES) {
[outputLock lock];
if (outputBuffersToRewrite > 0) {
outputBuffersToRewrite--;
[self generateTone:buffer];
}
err = AudioQueueEnqueueBuffer(queue, buffer, 0, NULL);
if (err == 560030580) { // Queue is not active due to Music being started or other reasons
isPlaying = NO;
} else if (err != noErr) {
NSLog(#"AudioQueueEnqueueBuffer() error %d", err);
}
[outputLock unlock];
} else {
err = AudioQueueStop (queue, NO);
if (err != noErr) NSLog(#"AudioQueueStop() error: %d", err);
}
}
-(void) generateTone: (AudioQueueBufferRef) buffer {
if (outputFrequency == 0.0) {
memset(buffer->mAudioData, 0, buffer->mAudioDataBytesCapacity);
buffer->mAudioDataByteSize = buffer->mAudioDataBytesCapacity;
} else {
// Make the buffer length a multiple of the wavelength for the output frequency.
int sampleCount = buffer->mAudioDataBytesCapacity / sizeof (SInt16);
double bufferLength = sampleCount;
double wavelength = sampleRate / outputFrequency;
double repetitions = floor (bufferLength / wavelength);
if (repetitions > 0.0) {
sampleCount = round (wavelength * repetitions);
}
double x, y;
double sd = 1.0 / sampleRate;
double amp = 0.9;
double max16bit = SHRT_MAX;
int i;
SInt16 *p = buffer->mAudioData;
for (i = 0; i < sampleCount; i++) {
x = i * sd * outputFrequency;
switch (outputWaveform) {
case kSine:
y = sin (x * 2.0 * M_PI);
break;
case kTriangle:
x = fmod (x, 1.0);
if (x < 0.25)
y = x * 4.0; // up 0.0 to 1.0
else if (x < 0.75)
y = (1.0 - x) * 4.0 - 2.0; // down 1.0 to -1.0
else
y = (x - 1.0) * 4.0; // up -1.0 to 0.0
break;
case kSawtooth:
y = 0.8 - fmod (x, 1.0) * 1.8;
break;
case kSquare:
y = (fmod(x, 1.0) < 0.5)? 0.7: -0.7;
break;
default: y = 0; break;
}
p[i] = y * max16bit * amp;
}
buffer->mAudioDataByteSize = sampleCount * sizeof (SInt16);
}
}
Something to watch out for is that your callback will be called on a non-main thread, so you have to practice thread safety with locks, mutexs, or other techniques.
This is a version using C# of the same sample from #lucius
void SetupAudio ()
{
AudioSession.Initialize ();
AudioSession.Category = AudioSessionCategory.MediaPlayback;
sampleRate = AudioSession.CurrentHardwareSampleRate;
var format = new AudioStreamBasicDescription () {
SampleRate = sampleRate,
Format = AudioFormatType.LinearPCM,
FormatFlags = AudioFormatFlags.LinearPCMIsSignedInteger | AudioFormatFlags.LinearPCMIsPacked,
BitsPerChannel = 16,
ChannelsPerFrame = 1,
BytesPerFrame = 2,
BytesPerPacket = 2,
FramesPerPacket = 1,
};
var queue = new OutputAudioQueue (format);
var bufferByteSize = (sampleRate > 16000)? 2176 : 512; // 40.5 Hz : 31.25 Hz
var buffers = new AudioQueueBuffer* [numBuffers];
for (int i = 0; i < numBuffers; i++){
queue.AllocateBuffer (bufferByteSize, out buffers [i]);
GenerateTone (buffers [i]);
queue.EnqueueBuffer (buffers [i], null);
}
queue.OutputCompleted += (object sender, OutputCompletedEventArgs e) => {
queue.EnqueueBuffer (e.UnsafeBuffer, null);
};
queue.Start ();
return true;
}
This is the tone generator:
void GenerateTone (AudioQueueBuffer *buffer)
{
// Make the buffer length a multiple of the wavelength for the output frequency.
uint sampleCount = buffer->AudioDataBytesCapacity / 2;
double bufferLength = sampleCount;
double wavelength = sampleRate / outputFrequency;
double repetitions = Math.Floor (bufferLength / wavelength);
if (repetitions > 0)
sampleCount = (uint)Math.Round (wavelength * repetitions);
double x, y;
double sd = 1.0 / sampleRate;
double amp = 0.9;
double max16bit = Int16.MaxValue;
int i;
short *p = (short *) buffer->AudioData;
for (i = 0; i < sampleCount; i++) {
x = i * sd * outputFrequency;
switch (outputWaveForm) {
case WaveForm.Sine:
y = Math.Sin (x * 2.0 * Math.PI);
break;
case WaveForm.Triangle:
x = x % 1.0;
if (x < 0.25)
y = x * 4.0; // up 0.0 to 1.0
else if (x < 0.75)
y = (1.0 - x) * 4.0 - 2.0; // down 1.0 to -1.0
else
y = (x - 1.0) * 4.0; // up -1.0 to 0.0
break;
case WaveForm.Sawtooth:
y = 0.8 - (x % 1.0) * 1.8;
break;
case WaveForm.Square:
y = ((x % 1.0) < 0.5)? 0.7: -0.7;
break;
default: y = 0; break;
}
p[i] = (short)(y * max16bit * amp);
}
buffer->AudioDataByteSize = sampleCount * 2;
}
}
You also want these definitions:
enum WaveForm {
Sine, Triangle, Sawtooth, Square
}
WaveForm outputWaveForm;
const float outputFrequency = 220;
High level: use AVAudioPlayer https://github.com/hollance/AVBufferPlayer
Med level: audio queues trailsinthesand.com/exploring-iphone-audio-part-1/ gets you going nicely. NOTE: I removed the http so the old link could be there, but it does direct to a bad site, so it apparently has changed.
Low level: alternatively, you can drop down a level and do it with audio units: http://cocoawithlove.com/2010/10/ios-tone-generator-introduction-to.html