VkDescriptorSet overstepping dynamic UBO

VkDescriptorSet overstepping dynamic UBO - vulkan

I'm trying to implement a dynamic uniform buffer object to my project taking this code as an example. I'm new to the API, I have done the core of the program following this tutorial. At the moment I'm stuck with this issue:
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 768 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
The error occurs for every occurance from this for loop (from createCommandBuffers()) (except the first one) :
for (uint32_t j = 0; j < OBJECT_INSTANCES; j++)
{
uint32_t dynamicOffset = j * static_cast<uint32_t>(dynamicAlignment);
std::cout << dynamicOffset << std::endl;
vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSets[i], 1, &dynamicOffset);
vkCmdDrawIndexed(commandBuffers[i], static_cast<uint32_t>(indices.size()), 1, 0, 0, 0);
std::cout << "done" << std::endl;
}
(dynamicOffset seems correct and get incremented well by 256)
I really don't see what I have done wrong.. Do someone has an idea?
How the VkDescriptorSets are created:
void createDescriptorSets() {
std::vector<VkDescriptorSetLayout> layouts(swapChainImages.size(), descriptorSetLayout);
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = descriptorPool;
allocInfo.descriptorSetCount = static_cast<uint32_t>(swapChainImages.size());
allocInfo.pSetLayouts = layouts.data();
descriptorSets.resize(swapChainImages.size());
if (vkAllocateDescriptorSets(device, &allocInfo, descriptorSets.data()) != VK_SUCCESS) {
throw std::runtime_error("failed to allocate descriptor sets!");
}
size_t dubo_size = OBJECT_INSTANCES * dynamicAlignment;
std::cout << dubo_size << std::endl;
for (size_t i = 0; i < swapChainImages.size(); i++) {
VkDescriptorBufferInfo uniformBufferInfo = {};
uniformBufferInfo.buffer = uniformBuffers[i];
uniformBufferInfo.offset = 0;
uniformBufferInfo.range = sizeof(UniformBufferObject);
VkDescriptorImageInfo imageInfo{};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = textureImageView;
imageInfo.sampler = textureSampler;
VkDescriptorBufferInfo dynamicUniformBufferInfo = {};
dynamicUniformBufferInfo.buffer = dynamicUniformBuffers[i];
dynamicUniformBufferInfo.offset = 0;
dynamicUniformBufferInfo.range = dubo_size;
std::array<VkWriteDescriptorSet, 3> descriptorWrites{};
descriptorWrites[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[0].dstSet = descriptorSets[i];
descriptorWrites[0].dstBinding = 0;
descriptorWrites[0].dstArrayElement = 0;
descriptorWrites[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorWrites[0].descriptorCount = 1;
descriptorWrites[0].pBufferInfo = &uniformBufferInfo;
descriptorWrites[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[1].dstSet = descriptorSets[i];
descriptorWrites[1].dstBinding = 1;
descriptorWrites[1].dstArrayElement = 0;
descriptorWrites[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrites[1].descriptorCount = 1;
descriptorWrites[1].pImageInfo = &imageInfo;
descriptorWrites[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[2].dstSet = descriptorSets[i];
descriptorWrites[2].dstBinding = 2;
descriptorWrites[2].dstArrayElement = 0;
descriptorWrites[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
descriptorWrites[2].descriptorCount = 1;
descriptorWrites[2].pBufferInfo = &dynamicUniformBufferInfo;
vkUpdateDescriptorSets(device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
}
}
How the buffers are created:
void createDynamicUniformBuffers()
{
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties(physicalDevice, &prop);
size_t minUboAlignment = prop.limits.minUniformBufferOffsetAlignment;
dynamicAlignment = sizeof(glm::mat4);
if (minUboAlignment > 0) {
dynamicAlignment = (dynamicAlignment + minUboAlignment - 1) & ~(minUboAlignment - 1);
}
VkDeviceSize bufferSize = OBJECT_INSTANCES * dynamicAlignment;
uboDataDynamic.model = (glm::mat4*)alignedAlloc(bufferSize, dynamicAlignment);
assert(uboDataDynamic.model);
std::cout << "minUniformBufferOffsetAlignment = " << minUboAlignment << std::endl;
std::cout << "dynamicAlignment = " << dynamicAlignment << std::endl;
dynamicUniformBuffers.resize(swapChainImages.size());
dynamicUniformBuffersMemory.resize(swapChainImages.size());
for (size_t i = 0; i < swapChainImages.size(); i++) {
createBuffer(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, dynamicUniformBuffers[i], dynamicUniformBuffersMemory[i]);
}
}
And finally and bit more of the terminal:
minUniformBufferOffsetAlignment = 256
dynamicAlignment = 256
32000
0
done
256
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 256 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
done
512
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 512 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
done
768
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 768 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
done
1024
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 1024 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
done
1280
validation layer: VkDescriptorSet 0x42bf70000000032[] bound as set #0 encountered the following validation error at vkCmdDrawIndexed() time: Dynamic descriptor in binding #2 index 0 uses buffer 9A90CE000000002B with dynamic offset 1280 combined with offset 0 and range 32000 that oversteps the buffer size of 32000.
done

Fix: The range given to the VkDescriptorBufferInfo for the dynamic UBO was the issue. Works out when using dynamicUniformBufferInfo.range = VK_WHOLE_SIZE instead.

Related

Assimp does not correctly load multiple meshes from one obj file

The obj files I am trying to load have multiple -o flags, so there are multiple meshes. I am trying to load them into only 1 VAO, and I will draw them by first recording each mesh's offset and size. I have noted that the offset and size are in terms of number of vertices instead of faces, so they are multiplied by 3. For example, the first mesh starts at offset 0, and its size is mesh1's mNumberFaces * 3, and the second mesh starts at offset mesh1's mNumberFaces * 3, and its size is mesh2's mNumberFaces * 3. However, it seems only the first mesh is drawn correctly, and the rest of the meshes are all distorted somehow.
This is my loading logic:
Object* obj = new Object(objName);
// Initialize the meshes in the obj file one by one
std::vector<glm::vec3> vert, norm;
std::vector<glm::vec2> text;
std::vector<glm::ivec3> indices;
int vertexOffset = 0;
std::cout << objName << " numMeshes: " << pScene->mNumMeshes << std::endl;
for (unsigned int i = 0; i < pScene->mNumMeshes; i++) {
std::cout << objName << ": vOffset " << vertexOffset << " numV " << pScene->mMeshes[i]->mNumFaces * 3 << std::endl;
aiMesh* pMesh = pScene->mMeshes[i];
aiVector3D Zero3D(0.0f, 0.0f, 0.0f);
for (unsigned int j = 0; j < pMesh->mNumVertices; j++) {
vert.push_back(glm::vec3(pMesh->mVertices[j].x, pMesh->mVertices[j].y, pMesh->mVertices[j].z));
norm.push_back(glm::vec3(pMesh->mNormals[j].x, pMesh->mNormals[j].y, pMesh->mNormals[j].z));
aiVector3D textCoord = pMesh->HasTextureCoords(0) ? pMesh->mTextureCoords[0][j] : Zero3D;
text.push_back(glm::vec2(textCoord.x, textCoord.y));
}
for (unsigned int j = 0; j < pMesh->mNumFaces; j++) {
aiFace face = pMesh->mFaces[j];
indices.push_back(glm::ivec3(face.mIndices[0], face.mIndices[1], face.mIndices[2]));
}
aiMaterial* mtl = pScene->mMaterials[pMesh->mMaterialIndex];
std::string meshName = std::string(pMesh->mName.C_Str());
Mesh* mesh = new Mesh(meshName, loadMaterial(mtl), vertexOffset, pMesh->mNumFaces * 3);
obj->meshList.push_back(mesh);
vertexOffset = vertexOffset + 3 * pMesh->mNumFaces;
}
//create the obj's node structure
//obj->root = processNode(pScene->mRootNode, obj->meshList);
//send the data to the gpu
GLuint vao;
GLuint vbo[3];
GLuint ebo;
glcheck(glGenVertexArrays(1, &vao));
glcheck(glBindVertexArray(vao));
glcheck(glGenBuffers(3, vbo));
glcheck(glBindBuffer(GL_ARRAY_BUFFER, vbo[0]));
glcheck(glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec3) * vert.size(), vert.data(), GL_STATIC_DRAW));
glcheck(glEnableVertexAttribArray(0));
glcheck(glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0));
glcheck(glBindBuffer(GL_ARRAY_BUFFER, vbo[1]));
glcheck(glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec3) * norm.size(), norm.data(), GL_STATIC_DRAW));
glcheck(glEnableVertexAttribArray(1));
glcheck(glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 0, 0));
glcheck(glBindBuffer(GL_ARRAY_BUFFER, vbo[2]));
glcheck(glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * text.size(), text.data(), GL_STATIC_DRAW));
glcheck(glEnableVertexAttribArray(2));
glcheck(glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 0, 0));
glcheck(glGenBuffers(1, &ebo));
glcheck(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo));
glcheck(glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(glm::ivec3) * indices.size(), indices.data(), GL_STATIC_DRAW));
// Unbind the VBO/VAO
glcheck(glBindVertexArray(0));
//glcheck(glBindBuffer(GL_ARRAY_BUFFER, 0));
//glcheck(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0));
obj->vao = vao; //shared vao variable
objMap[objName] = obj;
objList.push_back(obj);
return obj;
This is my drawing logic:
for (int i = 0; i < instObj->meshList.size(); i++) {
Mesh* mesh = instObj->meshList[i];
glcheck(glDrawElements(GL_TRIANGLES, mesh->size, GL_UNSIGNED_INT, (GLvoid*)(sizeof(GLuint) * mesh->vertexOffset)));
}
This is the first mesh, which is drawn correctly first mesh
The second mesh and onward are all messed up however, second mesh
The complete mesh enter image description here

Xamarin tensorflow

I want to develop TensorFlow on an android device, So far I trained with python and export model to Protobuf .pb file
the .pb file tested on python and its return no error
......
graph = load_graph("./frozen_model.pb")
for op in graph.get_operations():
print(op.name)
with tf.Session(graph=graph) as sess:
tf_predik = graph.get_tensor_by_name("prefix/tf_pred:0")
tf_data = graph.get_tensor_by_name("prefix/tf_data:0")
img = np.invert(Image.open("7.png").convert('L')).ravel(); image = array(img).reshape(1, 28,28,1);
fd = {tf_data: image};
test_pred = sess.run(tf_predik, feed_dict=fd); temp = np.argmax(test_pred, axis=1); print(temp)
My try on In Xamarin Android:
using Org.Tensorflow.Contrib.Android;
.....
var assets = Android.App.Application.Context.Assets;
var inferenceInterface = new TensorFlowInferenceInterface(assets, "frozen_model.pb");
using (Stream inputSteam = this.Assets.Open("7.png"))
{
byte[] bytes = inputSteam.ReadAllBytes();// convert to byte array???
inferenceInterface.Feed("tf_data", bytes, bytes.Length);
inferenceInterface.Run(new [] { "tf_pred:0" });
inferenceInterface.Fetch("tf_pred:0", predictions);
....
}
I get an error:
Java.Lang.IllegalArgumentException: Expects arg[0] to be float but uint8 is provided
Thank in advance.

Expects arg[0] to be float but uint8 is provided
TensorFlowInferenceInterface.Feed is expecting an array of float and thus you need to convert that asset-based image, decode its file encoding (jpg|png|...) to a Bitmap and obtain the float array from that.
Android Bitmap To Float Array
public float[] AndroidBitmapToFloatArray(Bitmap bitmap)
{
// Assuming a square image to sample|process, adjust based upon your model requirements
const int sizeX = 255;
const int sizeY = 255;
float[] floatArray;
int[] intArray;
using (var sampleImage = Bitmap.CreateScaledBitmap(bitmap, sizeX, sizeY, false).Copy(Bitmap.Config.Argb8888, false))
{
floatArray = new float[sizeX * sizeY * 3];
intArray = new int[sizeX * sizeY];
sampleImage.GetPixels(intArray, 0, sizeX, 0, 0, sizeX, sizeY);
sampleImage.Recycle();
}
for (int i = 0; i < intArray.Length; ++i)
{
var intValue = intArray[i];
floatArray[i * 3 + 0] = ((intValue & 0xFF) - 104);
floatArray[i * 3 + 1] = (((intValue >> 8) & 0xFF) - 117);
floatArray[i * 3 + 2] = (((intValue >> 16) & 0xFF) - 123);
}
return floatArray;
}
Example:
float[] feedArray;
using (var imageAsset = Assets.Open("someimage"))
using (var bitmappAsset = BitmapFactory.DecodeStream(imageAsset))
{
feedArray = AndroidBitmapToFloatArray(bitmappAsset);
}
inferenceInterface.Feed("tf_data", feedArray, feedArray.Length);

OpenCL Sum reduction across different work-groups gives the wrong result

So I'm currently trying to write a kernel in OpenCL with the goal of sum reducing each row of a matrix (g_idata) into an array (g_odata). Said matrix is represented by a float array with column_count * row_count length, and the resulting array has a length of row_count. As such I've implemented the following kernel:
#define T float
#define Operation(X, Y) ((X) + (Y))
__kernel void marrow_kernel( __global T *g_odata,__global T *g_idata,
const unsigned long column_count, const unsigned long row_count, __local volatile T* sdata) {
size_t tid = get_local_id(0);
size_t gid = get_global_id(0);
size_t row = gid / column_count;
size_t column = gid % column_count;
if(row < row_count && column < column_count)
{
sdata[tid] = g_idata[gid];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(row < row_count && column < column_count)
{
size_t step = column_count / 2;
size_t limit = column_count;
while(step > 0)
{
if(column + step < limit) {
if(tid + step < get_local_size(0))
{
sdata[tid] = Operation(sdata[tid], sdata[tid + step]);
}
else if (gid + step < column_count * row_count)
{
sdata[tid] = Operation(sdata[tid], g_idata[gid + step]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
step /= 2;
limit /= 2;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(row < row_count && column == 0)
{
g_odata[row] = column_count % 2 == 0 ? sdata[tid] : sdata[tid] + g_idata[gid + (column_count - 1)];
}
}
Said kernel is currently being instantiated with a work-group of 128 work-units. I currently have no control over the size of the work-group.
Now here's the issue: If lets say I've a row that's shared between two different work-groups, it'll return the wrong result, since it'll fetch the value in the g_idata, since it's impossible to access the result of the next work-group local memory. After the first iteration, that's the wrong value, and it'll afect the final result of the operation.
Anyone can give me an hint on how to solve this problem?

How to get 4 bytes of data (uint8_t) into a variable of type uint32_t

I've been working with Cypress BLE PSoC 4200, and I've set up my GATT database to send int32 data packets to my iPhone. However, you can only write to the GATT database with uint8 pieces of data. So I wrote the following to take this int32 voltage reading and put it into a uint8 byte array:
// function passes in int32 variable 'result'
uint8 array[4];
array[0] = result & 0xFF;
array[1] = (result >> 8) & 0xFF;
array[2] = (result >> 16) & 0xFF;
array[3] = (result >> 24) & 0xFF;
So, given that in mind, when that int32 packet gets sent, I want to be able take each byte, and recombine them somehow into the original int32 value, and print it to the screen (e.g. 456000 will be 0.456 V).
Right now, I obtain the 4 bytes and handle them like such:
NSData* data = [characteristic value];
const uint8_t *reportData = [data bytes];
// variable to hold the eventual 32-bit data
uint32_t voltage = 0;
Is there a way to go through each index of *reportData and concatenate the bytes? Any help will do, thanks.

Would something like this not work?
uint32_t v0 = (uint32_t)reportData[0];
uint32_t v1 = (uint32_t)reportData[1] << 8;
uint32_t v2 = (uint32_t)reportData[2] << 16;
uint32_t v3 = (uint32_t)reportData[3] << 24;
uint32_t voltage = v0 | v1 | v2 | v3;

Comparison between 2D and 3D Affine transforms

Is it expected that the following test should fail?
The test compares results of a 2D and a 3D AffineTransformation. Both are constructed to have unit scaling and zero offsets in the y and z direction, but to have non-zero and non-unity scaling and offset in the x direction. All other off-diagonal elements are zero. It is my belief that these transformations are identical in the x and y directions, and hence should produce identical results.
Furthermore I have found that the test passes if I use this Kernel:
using K = CGAL::Exact_predicates_exact_constructions_kernel;
Is it to be expected that the test passes if I use this Kernel? Should the test fail with either kernel or pass with either kernel?
TEST(TransformerTest, testCGALAffine) {
using K = CGAL::Exact_predicates_inexact_constructions_kernel;
using Float = typename K::FT;
using Transformation_2 = K::Aff_transformation_2;
using Transformation_3 = K::Aff_transformation_3;
using Point_2 = typename K::Point_2;
using Point_3 = typename K::Point_3;
double lowerCorner(17.005142946538115);
double upperCorner(91.940521484752139);
int resolution = 48;
double tmpScaleX((upperCorner - lowerCorner) / resolution);
Float scaleX(tmpScaleX);
Float zero(0);
Float unit(1);
// create a 2D voxel to world transform
Transformation_2 transformV2W_2(scaleX, zero, Float(lowerCorner),
zero, unit, zero,
unit);
// create it's inverse: a 2D world to voxel transform
auto transformW2V_2 = transformV2W_2.inverse();
// create a 3D voxel to world transform
Transformation_3 transformV2W_3(scaleX, zero, zero, Float(lowerCorner),
zero, unit, zero, zero,
zero, zero, unit, zero,
unit);
// create it's inverse: a 3D world to voxel transform
auto transformW2V_3 = transformV2W_3.inverse();
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 2; ++j) {
EXPECT_EQ(transformV2W_2.cartesian(i, j), transformV2W_3.cartesian(i, j)) << i << ", " << j;
EXPECT_EQ(transformW2V_2.cartesian(i, j), transformW2V_3.cartesian(i, j)) << i << ", " << j;
}
}
std::mt19937_64 rng(0);
std::uniform_real_distribution<double> randReal(0, resolution);
// compare the results of 2D and 3D transformations of random locations
for (int i = 0; i < static_cast<int>(1e4); ++i) {
Float x(randReal(rng));
Float y(randReal(rng));
auto world_2 = transformV2W_2(Point_2(x, y));
auto world_3 = transformV2W_3(Point_3(x, y, 0));
EXPECT_EQ(world_2.x(), world_3.x()) << world_2 << ", " << world_3;
auto voxel_2 = transformW2V_2(world_2);
auto voxel_3 = transformW2V_3(world_3);
EXPECT_EQ(voxel_2.x(), voxel_3.x()) << voxel_2 << ", " << voxel_3;
}
}

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

VkDescriptorSet overstepping dynamic UBO - vulkan

Fix: The range given to the VkDescriptorBufferInfo for the dynamic UBO was the issue. Works out when using dynamicUniformBufferInfo.range = VK_WHOLE_SIZE instead.

Related

Assimp does not correctly load multiple meshes from one obj file

Xamarin tensorflow

OpenCL Sum reduction across different work-groups gives the wrong result

How to get 4 bytes of data (uint8_t) into a variable of type uint32_t

Comparison between 2D and 3D Affine transforms

Categories

Resources