How to synchronize image transitions of ray traced image and swapchain image in Vulkan - vulkan

I am very much a Vulkan/ graphics APIs beginner. I've read some resources on Vulkan synchronization and understand it more than at the beginning but the code still doesn't work. I'm expecting the ray tracing pipeline to output a flat color bule image, but it flickers intensly between blue and just black. Validation layers scream every frame that "images passed to present must be in layout VK_IMAGE_LAYOUT_PRESENT_SRC_KHR or VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but is in VK_IMAGE_LAYOUT_UNDEFINED."
This is more or less what my code looks like:
vkBeginCommandBuffer();
// ... bind pipeline and descriptor sets
vkCmdTraceRaysKHR();
// Prepare current swap chain image as transfer destination
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = swapchainImage;
barrier.subresourceRange = subresource_range;
// No need to make anything available
barrier.srcAccessMask = 0;
// The result of this transition should be visible for transfers
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(
cmdBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, // No need to wait for anything
VK_PIPELINE_STAGE_TRANSFER_BIT, // Should make transfers wait
0,
0, nullptr,
0, nullptr,
1, &barrier
);
// Prepare ray tracing output image as transfer source
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = renderImage.image;
barrier.subresourceRange = subresource_range;
// The data written by the ray tracing should be made available
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
// The transition and data should be visible for transitions
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(
cmdBuffer,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, // Should wait until ray tracing is done
VK_PIPELINE_STAGE_TRANSFER_BIT, // Should make transfers wait
0,
0, nullptr,
0, nullptr,
1, &barrier
);
vkCmdCopyImage();
// Transition swap chain image back for presentation
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = swapchainImage;
barrier.subresourceRange = subresource_range;
// The effects of the transfer should be made available
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
// The effects of the transfer should be made visible for swapchain presentation
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
vkCmdPipelineBarrier(
cmdBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT, // Wait for transfers
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // Block all commands after this barrier
0,
0, nullptr,
0, nullptr,
1, &barrier
);
// Transition ray tracing output image back to general layout
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = renderImage.image;
barrier.subresourceRange = subresource_range;
// The effects of the transfer should be made available (possibly unnecessary?)
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
// (possibly unnecessary?)
barrier.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
vkCmdPipelineBarrier(
cmdBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT, // Wait for transfers
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, // Block all commands after this barrier
0,
0, nullptr,
0, nullptr,
1, &barrier
);
After all this the queue is submitted with two semaphores:
one wait semaphore that is signalled by vkAcquireNextImageKHR. It's wait stage is set to VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR.
one signal semaphore that is later used as a wait semaphore in vkQueuePresentKHR.
So how do I get rid of the vulkan validation layer message and properly display the rendered image?
Edit: The culrpit was found somwhere else (choosing wrong swapchain image for rendering), but I would still appreciate it if someone could confirm/correct my rationale behind the chosen stage and access masks. Especially that now I can't even make it freak out on purpouse, for example by setting the semaphore wait stage to BOTTOM_OF_PIPE (I thought it would mean that no stages wait so the render runs and writes without a swapchain image)

Related

<Vulkan> Use rendered vkImage as Texture

I want to use a vkImage rendered at a previous render pass as Texture to do the composite operation in a fragment shader. From here I learned vkCmdPipelineBarrier is used to wait for GPU finish a rendering operation and I write this code. It works well on Snapdragon devices. But not on Mali-G52. The Write-after-write error is partly happed. Is this code not enough? Any suggestions?
vkCmdEndRenderPass(cb);
vkCmdBeginRenderPass(cb, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
VkViewport viewport = vks::initializers::viewport((float)offscreenPass.width, (float)offscreenPass.height, 0.0f, 1.0f);
vkCmdSetViewport(cb, 0, 1, &viewport);
VkRect2D scissor = vks::initializers::rect2D(offscreenPass.width, offscreenPass.height, 0, 0);
vkCmdSetScissor(cb, 0, 1, &scissor);
// https://github.com/KhronosGroup/Vulkan-Samples/blob/master/samples/performance/pipeline_barriers/pipeline_barriers.cpp
VkImageMemoryBarrier imageMemoryBarrier = vks::initializers::imageMemoryBarrier();
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageMemoryBarrier.srcAccessMask = 0;
imageMemoryBarrier.dstAccessMask = 0;
imageMemoryBarrier.image = offscreenPass.color[drawframe].image;
imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imageMemoryBarrier.subresourceRange.baseMipLevel = 0;
imageMemoryBarrier.subresourceRange.levelCount = 1;
imageMemoryBarrier.subresourceRange.baseArrayLayer = 0;
imageMemoryBarrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(
cb,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
imageMemoryBarrier.image = offscreenPass.depth.image;
imageMemoryBarrier.srcAccessMask = 0;
imageMemoryBarrier.dstAccessMask = 0;
vkCmdPipelineBarrier(
cb,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
I have tried every pattern written here.
If you want to synchronize render passes then your pipeline barrier must be outside of the render pass in the command stream. I.e. it must be after the vkCmdEndRenderPass() of the first pass, and before the vkCmdBeginRenderPass() of the second pass. Pipeline barriers issued inside a render pass, as you are currently doing, are used for synchronization only within the current subpass.
Also, try to avoid:
srcStage=VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
dstStage=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
... for pipeline barriers when you only consume the output of the first pass as a fragment shader input in the second. This is overly conservative and needlessly serializes execution of the geometry processing too. In this case, you should use:
srcStage=VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
dstStage=VK_PIPELINE_STAGE_FRAGMENT_BIT
... which allows the non-dependent vertex shading and binning for the second pass to run in parallel to the first pass.
Self solved.
The difference in the precision of sampler2D between Adreno and Mali causes this issue. I can read correct data using "precision highp sampler2D".

The way that copy data to a linear tiled image(not using stage buffer)when the format of image is VK_FORMAT_R8G8B8_UNORM seems not work correctly?

There are two ways that can copy data to image(using stage buffer or not).In the first way that using stage buffer, when the image format is VK_FORMAT_R8G8B8A8_UNORM or VK_FORMAT_R8G8B8_UNORM, it works correctly.But in the way that not using stage buffer, the image format is VK_FORMAT_R8G8B8A8_UNORM, it works well. While changing the format to VK_FORMAT_R8G8B8_UNORM, the result of sample is not correct.The source data can be assured correct when setting different image format .
The code used is from [https://github.com/SaschaWillems/Vulkan/blob/master/examples/texture/texture.cpp](https://www.stackoverflow.com/
if (0/*useStaging*/) {
// Copy data to an optimal tiled image
// This loads the texture data into a host local buffer that is copied to the optimal tiled image on the device
// Create a host-visible staging buffer that contains the raw image data
// This buffer will be the data source for copying texture data to the optimal tiled image on the device
VkBuffer stagingBuffer;
VkDeviceMemory stagingMemory;
VkBufferCreateInfo bufferCreateInfo = vks::initializers::bufferCreateInfo();
bufferCreateInfo.size = ktxTextureSize;
// This buffer is used as a transfer source for the buffer copy
bufferCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, &stagingBuffer));
// Get memory requirements for the staging buffer (alignment, memory type bits)
vkGetBufferMemoryRequirements(device, stagingBuffer, &memReqs);
memAllocInfo.allocationSize = memReqs.size;
// Get memory type index for a host visible buffer
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &stagingMemory));
VK_CHECK_RESULT(vkBindBufferMemory(device, stagingBuffer, stagingMemory, 0));
// Copy texture data into host local staging buffer
uint8_t *data;
VK_CHECK_RESULT(vkMapMemory(device, stagingMemory, 0, memReqs.size, 0, (void **)&data));
memcpy(data, ktxTextureData, ktxTextureSize);
vkUnmapMemory(device, stagingMemory);
// Setup buffer copy regions for each mip level
std::vector<VkBufferImageCopy> bufferCopyRegions;
uint32_t offset = 0;
for (uint32_t i = 0; i < texture.mipLevels; i++) {
// Calculate offset into staging buffer for the current mip level
ktx_size_t offset;
KTX_error_code ret = ktxTexture_GetImageOffset(ktxTexture, i, 0, 0, &offset);
assert(ret == KTX_SUCCESS);
// Setup a buffer image copy structure for the current mip level
VkBufferImageCopy bufferCopyRegion = {};
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = i;
bufferCopyRegion.imageSubresource.baseArrayLayer = 0;
bufferCopyRegion.imageSubresource.layerCount = 1;
bufferCopyRegion.imageExtent.width = ktxTexture->baseWidth >> i;
bufferCopyRegion.imageExtent.height = ktxTexture->baseHeight >> i;
bufferCopyRegion.imageExtent.depth = 1;
bufferCopyRegion.bufferOffset = offset;
bufferCopyRegions.push_back(bufferCopyRegion);
}
// Create optimal tiled target image on the device
VkImageCreateInfo imageCreateInfo = vks::initializers::imageCreateInfo();
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = format;
imageCreateInfo.mipLevels = texture.mipLevels;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
// Set initial layout of the image to undefined
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageCreateInfo.extent = { texture.width, texture.height, 1 };
imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &texture.image));
vkGetImageMemoryRequirements(device, texture.image, &memReqs);
memAllocInfo.allocationSize = memReqs.size;
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &texture.deviceMemory));
VK_CHECK_RESULT(vkBindImageMemory(device, texture.image, texture.deviceMemory, 0));
VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
// Image memory barriers for the texture image
// The sub resource range describes the regions of the image that will be transitioned using the memory barriers below
VkImageSubresourceRange subresourceRange = {};
// Image only contains color data
subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
// Start at first mip level
subresourceRange.baseMipLevel = 0;
// We will transition on all mip levels
subresourceRange.levelCount = texture.mipLevels;
// The 2D texture only has one layer
subresourceRange.layerCount = 1;
// Transition the texture image layout to transfer target, so we can safely copy our buffer data to it.
VkImageMemoryBarrier imageMemoryBarrier = vks::initializers::imageMemoryBarrier();;
imageMemoryBarrier.image = texture.image;
imageMemoryBarrier.subresourceRange = subresourceRange;
imageMemoryBarrier.srcAccessMask = 0;
imageMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
// Insert a memory dependency at the proper pipeline stages that will execute the image layout transition
// Source pipeline stage is host write/read execution (VK_PIPELINE_STAGE_HOST_BIT)
// Destination pipeline stage is copy command execution (VK_PIPELINE_STAGE_TRANSFER_BIT)
vkCmdPipelineBarrier(
copyCmd,
VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imageMemoryBarrier);
// Copy mip levels from staging buffer
vkCmdCopyBufferToImage(
copyCmd,
stagingBuffer,
texture.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
static_cast<uint32_t>(bufferCopyRegions.size()),
bufferCopyRegions.data());
// Once the data has been uploaded we transfer to the texture image to the shader read layout, so it can be sampled from
imageMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// Insert a memory dependency at the proper pipeline stages that will execute the image layout transition
// Source pipeline stage is copy command execution (VK_PIPELINE_STAGE_TRANSFER_BIT)
// Destination pipeline stage fragment shader access (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)
vkCmdPipelineBarrier(
copyCmd,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imageMemoryBarrier);
// Store current layout for later reuse
texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
vulkanDevice->flushCommandBuffer(copyCmd, queue, true);
// Clean up staging resources
vkFreeMemory(device, stagingMemory, nullptr);
vkDestroyBuffer(device, stagingBuffer, nullptr);
} else {
// Copy data to a linear tiled image
VkImage mappableImage;
VkDeviceMemory mappableMemory;
// Load mip map level 0 to linear tiling image
VkImageCreateInfo imageCreateInfo = vks::initializers::imageCreateInfo();
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = format;
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
imageCreateInfo.extent = { texture.width, texture.height, 1 };
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &mappableImage));
// Get memory requirements for this image like size and alignment
vkGetImageMemoryRequirements(device, mappableImage, &memReqs);
// Set memory allocation size to required memory size
memAllocInfo.allocationSize = memReqs.size;
// Get memory type that can be mapped to host memory
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &mappableMemory));
VK_CHECK_RESULT(vkBindImageMemory(device, mappableImage, mappableMemory, 0));
// Map image memory
void *data;
VK_CHECK_RESULT(vkMapMemory(device, mappableMemory, 0, memReqs.size, 0, &data));
// Copy image data of the first mip level into memory
memcpy(data, ktxTextureData, memReqs.size);
vkUnmapMemory(device, mappableMemory);
// Linear tiled images don't need to be staged and can be directly used as textures
texture.image = mappableImage;
texture.deviceMemory = mappableMemory;
texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// Setup image memory barrier transfer image to shader read layout
VkCommandBuffer copyCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
// The sub resource range describes the regions of the image we will be transition
VkImageSubresourceRange subresourceRange = {};
subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subresourceRange.baseMipLevel = 0;
subresourceRange.levelCount = 1;
subresourceRange.layerCount = 1;
// Transition the texture image layout to shader read, so it can be sampled from
VkImageMemoryBarrier imageMemoryBarrier = vks::initializers::imageMemoryBarrier();;
imageMemoryBarrier.image = texture.image;
imageMemoryBarrier.subresourceRange = subresourceRange;
imageMemoryBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// Insert a memory dependency at the proper pipeline stages that will execute the image layout transition
// Source pipeline stage is host write/read execution (VK_PIPELINE_STAGE_HOST_BIT)
// Destination pipeline stage fragment shader access (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)
vkCmdPipelineBarrier(
copyCmd,
VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imageMemoryBarrier);
vulkanDevice->flushCommandBuffer(copyCmd, queue, true);
}
)

Single usage VkImageMemoryBarrier?

I've been learning vulkan and following vulkan-tutorial and right now I'm at the Texture mapping part. I'm loading an image and uploading it to the host memory, but I'm having trouble understanding the layout transitions and barriers.
Consider this (pseudo)code for loading and transitioning an image (inspired by this), which will be sampled in a fragment shader:
auto texture = loadTexture(filePath);
auto stagingBuffer = createStagingBuffer(texture.pixels, texture.size);
// Create image with:
// usage - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
// properties - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
auto imageBuffer = createImage();
// -- begin single usage command buffer --
auto cb = beginCommandBuffer();
VkImageMemoryBarrier preCopyBarrier {
// ...
.image = imageBuffer.image,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
// ...
};
// PipelineBarrier (preCopyBarrier):
// srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
// dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT
// imageMemoryBarrier = &preCopyBarrier
vkCmdPipelineBarrier(cb, ...);
// Copy stagingBuffer.buffer to imageBuffer.image
// dstImageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
vkCmdCopyBufferToImage(cb, ...);
VkImageMemoryBarrier postCopyBarrier {
// ...
.image = imageBuffer.image,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
// ...
};
// PipelineBarrier (postCopyBarrier):
// srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT
// dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
// imageMemoryBarrier = &postCopyBarrier
vkCmdPipelineBarrier(cb, ...)
endAndSubmitCommandBuffer(cb);
The preCopyBarrier is there because of the vkCmdCopyBufferToImage(...) command and will be "used"/"activated" only once and that is during this command(?).
The postCopyBarrier is there because of the fact, that it will be sampled in the fragment shader, so the layout transition
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL -> VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
has to happen every single time a frame is rendered(? Please correct me, if I'm wrong).
But (assuming I'm correct, which I'm probably not) I'm having trouble wrapping my head around the fact, that I'm creating a preCopyBarrier, which will be used only once and postCopyBarrier, which will be used continuously. If I were to load for example 200 textures, I'd have a bunch of their single usage preCopyBarriers laying around. Isn't this a...waste?
This might be a stupid question and I'm probably missing/misunderstanding something important, but I feel like I shouldn't move on without understanding this concept correctly.

vulkan: VkImageMemoryBarrier

I don't quite understand here.:
https://github.com/SaschaWillems/Vulkan/blob/master/examples/computeshader/computeshader.cpp
void draw()
{
VulkanExampleBase::prepareFrame();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
// Submit compute commands
// Use a fence to ensure that compute command buffer has finished executin before using it again
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
}
drawCmdBuffers[currentBuffer] runs before compute.commandBuffer, but the consumer drawCmdBuffers[currentBuffer] requires the textureComputeTarget produced by the producer compute.commandBuffer.
I don't understand why drawCmdBuffers[currentBuffer] is called before compute.commandBuffer.
In the following code, only the first frame is rendered, while the right picture does not get the textureComputeTarget, so it is rendered with a blue background.
void draw()
{
VulkanExampleBase::prepareFrame();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
// Submit compute commands
// Use a fence to ensure that compute command buffer has finished executin before using it again
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
sleep(1000) // <-------- Wait
}
Executed when calling vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE):
VkImageMemoryBarrier imageMemoryBarrier = {};
imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
// We won't be changing the layout of the image
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemoryBarrier.image = textureComputeTarget.image;
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(
drawCmdBuffers[i],
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_FLAGS_NONE,
0, nullptr,
0, nullptr,
1, &imageMemoryBarrier);
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
Wait for VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, but this phase has not been executed before, why is the pipeline not stuck? Is it because
there is no pipeline before, so there is no need to wait?
In section 6.6 Pipeline Barriers
vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
void draw()
{
printf("%p, %p\n", queue, compute.queue);
VulkanExampleBase::prepareFrame();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
// Submit compute commands
// Use a fence to ensure that compute command buffer has finished executin before using it again
vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &compute.fence);
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
sleep(1000);
}
Print results:
0x6000039c4a20, 0x6000039c4a20
The current queue and compute.queue are the same queue.But it is possible that the above code may generate different queue.
Can VkImageMemoryBarrier be synchronized in multiple queues?
vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or
between commands in the same subpass. why use "or", why not use
"and"?
I don't understand why drawCmdBuffers[currentBuffer] is called before compute.commandBuffer.
Dunno, it is an example. Author was probably not awfully woried what happens in the first frame. It would simply be drawn with one frame delay. Swapping the compute before draw should also work with some effort.
Wait for VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, but this phase has not been executed before, why is the pipeline not stuck? Is it because there is no pipeline before, so there is no need to wait?
Because that is not how pipeline and dependencies work. vkCmdPipelineBarrier makes sure any command\operation in queue before the barrier reaches (and finishes) at least the srcStage stage (i.e. COMPUTE) before any command\op recorded after it reach dstStage.
Such dependency is satisfied even if there are no commands recorded before. I.e. by definition of "nothing", there are no commands that have not reached COMPUTE stage yet.
Can VkImageMemoryBarrier be synchronized in multiple queues?
Yes, with the help of a Semaphore.
For VK_SHARING_MODE_EXCLUSIVE and different queue family it is called Queue Family Ownership Transfer (QFOT).
Otherwisely, a Semaphore already performs a memory dependency and a VkImageMemoryBarrier is not needed.
vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass. why use "or", why not use "and"?
vkCmdPipelineBarrier is either outside subpass, then it forms a dependency with commands recorded before and after in the queue.
Or vkCmdPipelineBarrier is inside a subpass, in which case it is called "subpass self-dependency" and its scope is limited only to that subpass (among other restrictions).

Vulkan Device - Host - Device synchronization with VkEvent

I'm trying to synchronize a host stage into my pipeline, where I basically edit some data on the host during the execution of a command buffer on the device. From reading the specification I think I'm doing the correct synchronization, execution/memory dependencies and availability/visibility operations, but it neither works on NV nor AMD hardware. Is this even possible? If so, what am I doing wrong in terms of synchronization?
In summary I'm doing the following:
[D] A device buffer (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) is copied to a host visible and coherent one (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT).
[D] The first event is set.
[D] The second event is waited for.
[H] Meanwhile the host waits for the first event.
[H] After it has been set, it increments the numbers in the host visible buffer.
[H] Then it sets the second event.
[D] The device then continues to copy the host visible buffer back to the device local buffer.
What happens?
On NV the first part works, the correct data arrives at the host side, but the altered data never arrives at the device side. On AMD not even the first part works and I already don't get the data on the host.
Command buffer recording:
// ...
VkMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask = ...;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(command_buffer, ..., VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
copyWholeBuffer(command_buffer, host_buffer, device_buffer);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
vkCmdSetEvent(command_buffer, device_to_host_sync_event, VK_PIPELINE_STAGE_TRANSFER_BIT);
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdWaitEvents(command_buffer, 1, &host_to_device_sync_event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 1, &barrier, 0, nullptr, 0, nullptr);
copyWholeBuffer(command_buffer, device_buffer, host_buffer);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = ...;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, ..., 0, 1, &barrier, 0, nullptr, 0, nullptr);
// ...
Execution
vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE);
while(vkGetEventStatus(device, device_to_host_sync_event) != VK_EVENT_SET)
std::this_thread::sleep_for(std::chrono::microseconds(10));
void* data;
vkMapMemory(device, host_buffer, 0, BUFFER_SIZE, 0, &data);
// read and write parts of the memory
vkUnmapMemory(device, host_buffer);
vkSetEvent(device, host_to_device_sync_event);
vkDeviceWaitIdle(device);
I've uploaded a working example: https://gist.github.com/neXyon/859b2e52bac9a5a56b804d8a9d5fa4a5
The interesting bits start at line 292! Please have a look if it works for you?
I opened an issue on github: https://github.com/KhronosGroup/Vulkan-Docs/issues/755
After a bit of discussion there, the conclusion is that Device to Host synchronization is not possible with an event and a fence has to be used.