iVar Shown as Private Global In LLVMIR - objective-c

I have declared a iVar in a class:
#implementation LLVMIRTest{
NSString* ivarTest;
}
When I check for LLVM IR it shows me:
#OBJC_METH_VAR_NAME_ = private global [9 x i8] c"ivarTest\00", section "__TEXT,__objc_methname,cstring_literals", align 1
#OBJC_METH_VAR_TYPE_ = private global [12 x i8] c"#\22NSString\22\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
I have to ask why it is private global mention in LLVM IR. Why not only private?
This is full Module LLVM IR:
; ModuleID = 'LLVMIRTest.m'
source_filename = "LLVMIRTest.m"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
%struct._objc_cache = type opaque
%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
%struct._objc_method = type { i8*, i8*, i8* }
%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8**, i8*, %struct._prop_list_t* }
%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
%struct._prop_t = type { i8*, i8* }
#_objc_empty_cache = external global %struct._objc_cache
#"OBJC_METACLASS_$_NSObject" = external global %struct._class_t
#OBJC_CLASS_NAME_ = private global [11 x i8] c"LLVMIRTest\00", section "__TEXT,__objc_classname,cstring_literals", align 1
#"\01l_OBJC_METACLASS_RO_$_LLVMIRTest" = private global %struct._class_ro_t { i32 1, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
#"OBJC_METACLASS_$_LLVMIRTest" = global %struct._class_t { %struct._class_t* #"OBJC_METACLASS_$_NSObject", %struct._class_t* #"OBJC_METACLASS_$_NSObject", %struct._objc_cache* #_objc_empty_cache, i8* (i8*, i8*)** null, %struct._class_ro_t* #"\01l_OBJC_METACLASS_RO_$_LLVMIRTest" }, section "__DATA, __objc_data", align 8
#"OBJC_CLASS_$_NSObject" = external global %struct._class_t
#"OBJC_IVAR_$_LLVMIRTest.ivarTest" = hidden global i64 8, section "__DATA, __objc_ivar", align 8
#OBJC_METH_VAR_NAME_ = private global [9 x i8] c"ivarTest\00", section "__TEXT,__objc_methname,cstring_literals", align 1
#OBJC_METH_VAR_TYPE_ = private global [12 x i8] c"#\22NSString\22\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
#"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" = private global { i32, i32, [1 x %struct._ivar_t] } { i32 32, i32 1, [1 x %struct._ivar_t] [%struct._ivar_t { i64* #"OBJC_IVAR_$_LLVMIRTest.ivarTest", i8* getelementptr inbounds ([9 x i8], [9 x i8]* #OBJC_METH_VAR_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* #OBJC_METH_VAR_TYPE_, i32 0, i32 0), i32 3, i32 8 }] }, section "__DATA, __objc_const", align 8
#"\01l_OBJC_CLASS_RO_$_LLVMIRTest" = private global %struct._class_ro_t { i32 0, i32 8, i32 16, i8* null, i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* #"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" to %struct._ivar_list_t*), i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
#"OBJC_CLASS_$_LLVMIRTest" = global %struct._class_t { %struct._class_t* #"OBJC_METACLASS_$_LLVMIRTest", %struct._class_t* #"OBJC_CLASS_$_NSObject", %struct._objc_cache* #_objc_empty_cache, i8* (i8*, i8*)** null, %struct._class_ro_t* #"\01l_OBJC_CLASS_RO_$_LLVMIRTest" }, section "__DATA, __objc_data", align 8
#"OBJC_LABEL_CLASS_$" = private global [1 x i8*] [i8* bitcast (%struct._class_t* #"OBJC_CLASS_$_LLVMIRTest" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
#llvm.compiler.used = appending global [5 x i8*] [i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* #OBJC_METH_VAR_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* #OBJC_METH_VAR_TYPE_, i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* #"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" to i8*), i8* bitcast ([1 x i8*]* #"OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
!llvm.ident = !{!6}
!0 = !{i32 1, !"Objective-C Version", i32 2}
!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
!4 = !{i32 1, !"Objective-C Class Properties", i32 64}
!5 = !{i32 1, !"PIC Level", i32 2}
!6 = !{!"Apple LLVM version 8.0.0 (clang-800.0.38)"}

Global variables are considered module scoped that are initialized at compile time versus runtime.
Most front-ends to LLVM put strings at the module scope and those require a global or a constant tag. There is a way to store strings on the stack instead but that is a rare occurrence in my experience. I typically create strings as
#somename = internal constant....
The private is instructing not to expose the symbols outside of the module.
I assume this is how Objective-C adds type and instrumentation support for classes. If you look at the rest of the output it is likely that the pointers to those strings are getting passed into the RT library (e.g. %x = load ...) prior to some call.
Update after OP added listing
What you've listed is the 'static' module level constructs representing your class. If you look closely, the class declaration consists of a number of embedded structures and strings. Because these declarations are at the module level they are declared global which puts them, as you would expect, in the data segment as it is data after-all and not method implementation. Think of your class as a structure containing not only space for your variable 'iVar` but additional information necessary for the Objective-C RT.
So, classes and their variables are considered module level declarations which are represented as data structures at the module level which is marked global by LLVM standards. See LLVM Global Variable documentation.

Related

CoreML: Failed in 2nd reshape after missing custom layer info

I our very big Tensorflow (2.x) model there is a part where we use InceptionResNetV2 and create multi-pooled model from it:
model_base = InceptionResNetV2(weights = 'imagenet',
include_top = False,
input_shape = input_shape)
ImgResizer = Lambda(lambda x: tf.image.resize(x, pool_size, method='area'),
name='feature_resizer')
feature_layers = [l for l in model_base.layers if 'mixed' in l.name]
feature_layers = [feature_layers[i] for i in indexes]
pools = [ImgResizer(l.output) for l in feature_layers]
conc_pools = Concatenate(name='conc_pools', axis=3)(pools)
model = Model(inputs = model_base.input, outputs = conc_pools)
Here we use Lambda function to resize 4D tensor ([batch, height, width, channels]) to a new 4D tensor with pool_size (5,5), input_shape=(None, None, 3) and indexes=list(range(43)). Unfortunately, this operation (ResizeArea) is not supported in the current version of coremltools (5.0) and I have to do a custom operation wrapper and then implement it on a swift.
#register_op(doc_str='Custom ResizeArea Layer', is_custom_op=True)
class custom_resize_area(Operation):
input_spec = InputSpec(
x = TensorInputType(),
s = ScalarOrTensorInputType()
)
bindings = { 'class_name' : 'CustomResizeArea',
'input_order' : ['x', 's'],
'parameters' : [],
'description' : "Resize area custom layer"
}
def __init__(self, **kwargs):
super(custom_resize_area, self).__init__(**kwargs)
def type_inference(self):
x_type = self.x.dtype
x_shape = self.x.shape
s = list(self.s.val)
ret_shape = list(x_shape)
ret_shape[1] = s[0]
ret_shape[2] = s[1]
#print(x_shape, ret_shape)
return types.tensor(x_type, ret_shape)
# Override ResizeArea op with override=True flag
#register_tf_op(tf_alias=['ResizeArea'], override=True)
def CustomResizeArea(context, node):
#input: "model_2/mixed_5b/concat"
#input: "model_2/lambda/resize/size"
x = context[node.inputs[0]]
s = context[node.inputs[1]]
x = mb.custom_resize_area(x=x, s=s, name=node.name)
context.add(node.name, x)
The conversion was successful and now I started to implement this custom layer on swift. Here is a part from my code:
#objc(CustomResizeArea) class CustomResizeArea: NSObject, MLCustomLayer {
required init(parameters: [String : Any]) throws {
super.init()
}
func setWeightData(_ weights: [Data]) throws {}
func outputShapes(forInputShapes inputShapes: [[NSNumber]]) throws
-> [[NSNumber]] {
print(#function, inputShapes)
let outputShape = inputShapes[0]
// [ sequence, batch, channel, height, width ] - why?
if outputShape.count == 5 {
print(outputShape)
return [outputShape]
}
print([outputShape[0], 5, 5, outputShape[3]])
return [[outputShape[0], 5, 5, outputShape[3]]]
}
...
}
I am publishing only outputShapes function since the CoreML gives me an error before evaluate function will actually be executed. Here the last logs (with some output shapes parameters inside):
outputShapes(forInputShapes:) [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]
outputShapes(forInputShapes:) [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]
.....
outputShapes(forInputShapes:) [[1, 0, 0, 448], [2]]
[1, 5, 5, 448]
outputShapes(forInputShapes:) [[1, 0, 0, 448], [2]]
[1, 5, 5, 448]
outputShapes(forInputShapes:) [[1, 0, 0, 448], [2]]
[1, 5, 5, 448]
2021-11-06 10:15:55.085694+0100 snafu[11263:4717917] [espresso] [Espresso::handle_ex_plan] exception=Failed in 2nd reshape after missing custom layer info.
2021-11-06 10:15:55.086108+0100 snafu[11263:4717917] [coreml] Error in adding network -1.
2021-11-06 10:15:55.086477+0100 snafu[11263:4717917] [coreml] MLModelAsset: load failed with error Error Domain=com.apple.CoreML Code=0 "Error in declaring network." UserInfo={NSLocalizedDescription=Error in declaring network.}
I have no idea why I get [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] as an inout, and why are there zeros in, for example, [[1, 0, 0, 448], 2]. Actually my model should support any image size on input, that's why? I think I have tried everything...
P.S.:
I also tried to replace area reize method with bilinear in the python code (Lambda) because there is a BilinearResize converter in coremltools. But I can not convert it, because I get the following error:
File "/home/alex/anaconda3/envs/tfgpu/lib/python3.8/site-packages/coremltools/converters/mil/mil/builder.py", line 75, in _add_const
raise ValueError("Cannot add const {}".format(val))
ValueError: Cannot add const 5.0001/is57
I suppose that 5.0001 here is my pool_size, but I don't understand why is it incorrect.

How can I add separators between different records in a bincoded file?

I have following struct
struct Employee {
id: u64,
name: String,
}
I am serializing it with following code and then writing the serialized byte array to a file:
let emp = Employee {
id: 1546,
name: "abcd".to_string(),
};
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open("hello.txt")
.unwrap();
let initial_buf = &bincode::serialize(&emp).unwrap();
println!("Initial Buf: {:?}", initial_buf);
file.write(&initial_buf);
file.write(&[b'\n']);
file.flush();
file.seek(SeekFrom::Start(0)).unwrap();
let mut final_buf: Vec<u8> = Vec::new();
let mut reader = BufReader::new(file);
reader.read_until(b'\n', &mut final_buf).unwrap();
println!("Final Buf: {:?}", final_buf);
I get the following output:
Initial Buf: [10, 6, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 97, 98, 99, 100]
Final Buf: [10]
Bincode's contract is that you give it a value to serialize and it gives you back bytes. The contract does not guarantee that the bytes you get back cannot contain a newline.
In your data the integer 1546 is 0x60A which is represented as the bytes [10, 6, 0, 0].
You should be able to work with Bincode data without any separators at all. The bincode::deserialize_from function will know where to stop reading.

Rendering a cube in Vulkan vs OpenGL

I wrote a simple OpenGL program which merely renders a cube from an angle. It's as simple as you can get: vertex buffer only (no index buffer), a vertex shader which only multiplies the vertices by an MVP matrix from a uniform buffer, and a static fragment shader which just returns red. More recently, I have tried writing this same program in Vulkan, but I have run into some issues.
I started by following the Intel API without secrets tutorial to setup a simple 2d texture rendering program, but when I took the leap into 3d, I started having issues. In order to debug this, I simplified the program to match my older OpenGL program (removed texturing and some other extra stuff I did in Vulkan), and even went as far as to use the exact same vertex and MVP data. However, I just can't get the cube to render correctly in Vulkan.
I am aware that OpenGL coordinates do not map directly to Vulkan coordinates, as the Y coordinate is flipped, but if anything that should just flip the image upside down, and I already tried switching the Y values in the MVP. I feel like there is some other detail I am missing here with coordinates, but I just can't figure it out searching around and looking at guides about converting OpenGL code bases to Vulkan.
I'm including the data I am uploading to the shaders, and some of the core code from the Vulkan code base. The Vulkan code is in D, so it's similar to C++, but a little different. With the library I'm using for wrapping Vulkan (erupted), the device level functions are loaded into a device dispatch (access as device.dispatch in the code), and when they are called on the dispatch without the vk prefix, the device and command buffer (which is assigned to the dispatch in code) arguments of the function are auto populated.
Vertex Data:
[ [1, 1, 1, 1],
[1, 1, -1, 1],
[-1, 1, -1, 1],
[1, 1, 1, 1],
[-1, 1, -1, 1],
[-1, 1, 1, 1],
[1, 1, 1, 1],
[1, -1, 1, 1],
[1, -1, -1, 1],
[1, 1, 1, 1],
[1, -1, -1, 1],
[1, 1, -1, 1],
[1, 1, -1, 1],
[1, -1, -1, 1],
[-1, -1, -1, 1],
[1, 1, -1, 1],
[-1, -1, -1, 1],
[-1, 1, -1, 1],
[-1, 1, -1, 1],
[-1, -1, -1, 1],
[-1, -1, 1, 1],
[-1, 1, -1, 1],
[-1, -1, 1, 1],
[-1, 1, 1, 1],
[-1, 1, 1, 1],
[-1, -1, 1, 1],
[1, -1, 1, 1],
[-1, 1, 1, 1],
[1, -1, 1, 1],
[1, 1, 1, 1],
[1, -1, 1, 1],
[1, -1, -1, 1],
[-1, -1, -1, 1],
[1, -1, 1, 1],
[-1, -1, -1, 1],
[-1, -1, 1, 1] ]
MVP:
[ [-1.0864, -0.993682, -0.687368, -0.685994],
[0, 2.07017, 0.515526, -0.514496],
[-1.44853, 0.745262, 0.515526, 0.514496],
[-8.04095e-16, 0, 5.64243, 5.83095] ]
Graphics Pipeline Setup:
VkPipelineShaderStageCreateInfo[] shader_stage_infos = [
{
stage: VK_SHADER_STAGE_VERTEX_BIT,
_module: vertex_shader,
pName: "main"
},
{
stage: VK_SHADER_STAGE_FRAGMENT_BIT,
_module: fragment_shader,
pName: "main"
}
];
VkVertexInputBindingDescription[] vertex_binding_descriptions = [
{
binding: 0,
stride: VertexData.sizeof,
inputRate: VK_VERTEX_INPUT_RATE_VERTEX
}
];
VkVertexInputAttributeDescription[] vertex_attribute_descriptions = [
{
location: 0,
binding: vertex_binding_descriptions[0].binding,
format: VK_FORMAT_R32G32B32A32_SFLOAT,
offset: VertexData.x.offsetof
},
{
location: 1,
binding: vertex_binding_descriptions[0].binding,
format: VK_FORMAT_R32G32_SFLOAT,
offset: VertexData.u.offsetof
}
];
VkPipelineVertexInputStateCreateInfo vertex_input_state_info = {
vertexBindingDescriptionCount: vertex_binding_descriptions.length.to!uint,
pVertexBindingDescriptions: vertex_binding_descriptions.ptr,
vertexAttributeDescriptionCount: vertex_attribute_descriptions.length.to!uint,
pVertexAttributeDescriptions: vertex_attribute_descriptions.ptr
};
VkPipelineInputAssemblyStateCreateInfo input_assembly_state_info = {
topology: VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
primitiveRestartEnable: VK_FALSE
};
VkPipelineViewportStateCreateInfo viewport_state_info = {
viewportCount: 1,
pViewports: null,
scissorCount: 1,
pScissors: null
};
VkPipelineRasterizationStateCreateInfo rasterization_state_info = {
depthBiasClamp: 0.0,
polygonMode: VK_POLYGON_MODE_FILL,
cullMode: VK_CULL_MODE_FRONT_AND_BACK,
frontFace: VK_FRONT_FACE_COUNTER_CLOCKWISE,
lineWidth: 1
};
VkPipelineMultisampleStateCreateInfo multisample_state_info = {
rasterizationSamples: VK_SAMPLE_COUNT_1_BIT,
minSampleShading: 1
};
VkPipelineColorBlendAttachmentState[] color_blend_attachment_states = [
{
blendEnable: VK_FALSE,
srcColorBlendFactor: VK_BLEND_FACTOR_ONE,
dstColorBlendFactor: VK_BLEND_FACTOR_ZERO,
colorBlendOp: VK_BLEND_OP_ADD,
srcAlphaBlendFactor: VK_BLEND_FACTOR_ONE,
dstAlphaBlendFactor: VK_BLEND_FACTOR_ZERO,
alphaBlendOp: VK_BLEND_OP_ADD,
colorWriteMask:
VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT
}
];
VkPipelineColorBlendStateCreateInfo color_blend_state_info = {
logicOpEnable: VK_FALSE,
logicOp: VK_LOGIC_OP_COPY,
attachmentCount: color_blend_attachment_states.length.to!uint,
pAttachments: color_blend_attachment_states.ptr,
blendConstants: [ 0, 0, 0, 0 ]
};
VkDynamicState[] dynamic_states = [
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR
];
VkPipelineDynamicStateCreateInfo dynamic_state_info = {
dynamicStateCount: dynamic_states.length.to!uint,
pDynamicStates: dynamic_states.ptr
};
VkGraphicsPipelineCreateInfo pipeline_info = {
stageCount: shader_stage_infos.length.to!uint,
pStages: shader_stage_infos.ptr,
pVertexInputState: &vertex_input_state_info,
pInputAssemblyState: &input_assembly_state_info,
pTessellationState: null,
pViewportState: &viewport_state_info,
pRasterizationState: &rasterization_state_info,
pMultisampleState: &multisample_state_info,
pDepthStencilState: null,
pColorBlendState: &color_blend_state_info,
pDynamicState: &dynamic_state_info,
layout: pipeline_layout,
renderPass: render_pass,
subpass: 0,
basePipelineHandle: VK_NULL_HANDLE,
basePipelineIndex: -1
};
VkPipeline[1] pipelines;
checkVk(device.dispatch.CreateGraphicsPipelines(VK_NULL_HANDLE, 1, [pipeline_info].ptr, pipelines.ptr));
pipeline = pipelines[0];
Drawing:
if(device.dispatch.WaitForFences(1, [fence].ptr, VK_FALSE, 1000000000) != VK_SUCCESS)
throw new StringException("timed out waiting for fence");
device.dispatch.ResetFences(1, [fence].ptr);
uint image_index;
switch(device.dispatch.AcquireNextImageKHR(swapchain.swapchain, uint64_t.max, image_available_semaphore, VK_NULL_HANDLE, &image_index)) {
case VK_SUCCESS:
case VK_SUBOPTIMAL_KHR:
break;
case VK_ERROR_OUT_OF_DATE_KHR:
on_window_size_changed();
break;
default:
throw new StringException("unhandled vk result on swapchain image acquisition");
}
if(framebuffer != VK_NULL_HANDLE) device.dispatch.DestroyFramebuffer(framebuffer);
VkFramebufferCreateInfo framebuffer_info = {
renderPass: swapchain.render_pass,
attachmentCount: 1,
pAttachments: [swapchain.image_resources[image_index].image_view].ptr,
width: swapchain.extent.width,
height: swapchain.extent.height,
layers: 1
};
checkVk(device.dispatch.CreateFramebuffer(&framebuffer_info, &framebuffer));
VkCommandBufferBeginInfo cmd_begin_info = { flags: VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
VkImageSubresourceRange image_subresource_range = {
aspectMask: VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel: 0,
levelCount: 1,
baseArrayLayer: 0,
layerCount: 1,
};
VkImageMemoryBarrier barrier_from_present_to_draw = {
srcAccessMask: VK_ACCESS_MEMORY_READ_BIT,
dstAccessMask: VK_ACCESS_MEMORY_READ_BIT,
oldLayout: VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
newLayout: VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
srcQueueFamilyIndex: device.present_queue.family_index,
dstQueueFamilyIndex: device.graphics_queue.family_index,
image: swapchain.image_resources[image_index].image,
subresourceRange: image_subresource_range
};
VkImageMemoryBarrier barrier_from_draw_to_present = {
srcAccessMask: VK_ACCESS_MEMORY_READ_BIT,
dstAccessMask: VK_ACCESS_MEMORY_READ_BIT,
oldLayout: VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
newLayout: VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
srcQueueFamilyIndex: device.graphics_queue.family_index,
dstQueueFamilyIndex: device.present_queue.family_index,
image: swapchain.image_resources[image_index].image,
subresourceRange: image_subresource_range
};
VkViewport viewport = {
x: 0,
y: 0,
width: swapchain.extent.width,
height: swapchain.extent.height,
minDepth: 0,
maxDepth: 1
};
VkRect2D scissor = {
offset: {
x: 0,
y: 0
},
extent: swapchain.extent
};
VkClearValue[] clear_values = [
{ color: { [ 1.0, 0.8, 0.4, 0.0 ] } }
];
VkRenderPassBeginInfo render_pass_begin_info = {
renderPass: swapchain.render_pass,
framebuffer: framebuffer,
renderArea: {
offset: {
x: 0,
y: 0
},
extent: swapchain.extent
},
clearValueCount: clear_values.length.to!uint,
pClearValues: clear_values.ptr
};
device.dispatch.commandBuffer = command_buffer;
device.dispatch.BeginCommandBuffer(&cmd_begin_info);
if(device.graphics_queue.handle != device.present_queue.handle)
device.dispatch.CmdPipelineBarrier(
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
0, 0, null, 0, null, 1,
&barrier_from_present_to_draw
);
device.dispatch.CmdBeginRenderPass(&render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE);
device.dispatch.CmdBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, swapchain.pipeline);
device.dispatch.CmdSetViewport(0, 1, &viewport);
device.dispatch.CmdSetScissor(0, 1, &scissor);
const(ulong) vertex_buffer_offset = 0;
device.dispatch.CmdBindVertexBuffers(0, 1, &vertex_buffer, &vertex_buffer_offset);
device.dispatch.CmdBindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_set, 0, null);
device.dispatch.CmdDraw(draw_count, 1, 0, 0);
device.dispatch.CmdEndRenderPass();
if(device.graphics_queue.handle != device.present_queue.handle)
device.dispatch.CmdPipelineBarrier(
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
0, 0, null, 0, null, 1,
&barrier_from_draw_to_present
);
checkVk(device.dispatch.EndCommandBuffer());
device.dispatch.commandBuffer = VK_NULL_HANDLE;
VkSubmitInfo submit_info = {
waitSemaphoreCount: 1,
pWaitSemaphores: [image_available_semaphore].ptr,
pWaitDstStageMask: castFrom!(VkPipelineStageFlagBits*).to!(const(uint)*)([VK_PIPELINE_STAGE_TRANSFER_BIT].ptr),
commandBufferCount: 1,
pCommandBuffers: [command_buffer].ptr,
signalSemaphoreCount: 1,
pSignalSemaphores: [rendering_finished_semaphore].ptr
};
checkVk(device.dispatch.vkQueueSubmit(device.graphics_queue.handle, 1, [submit_info].ptr, fence));
VkPresentInfoKHR present_info = {
waitSemaphoreCount: 1,
pWaitSemaphores: [rendering_finished_semaphore].ptr,
swapchainCount: 1,
pSwapchains: [swapchain.swapchain].ptr,
pImageIndices: [image_index].ptr
};
switch(device.dispatch.vkQueuePresentKHR(device.present_queue.handle, &present_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_OUT_OF_DATE_KHR:
case VK_SUBOPTIMAL_KHR:
on_window_size_changed();
break;
default:
throw new StringException("unhandled vk result on presentation");
}
(I can't embed the images because my rep is too low, sorry)
Program Outputs:
OpenGL draws the cube as expected
OpenGL Output
Vulkan does not render anything except for the clear color.
UPDATE:
After fixing the cull mode by changing it to VK_CULL_MODE_NONE, this is the result I get:
Output after cull mode fix
VK_CULL_MODE_FRONT_AND_BACK
I think this is your problem :)
After cull mode fix, seems that your problem in your vertex data layout. Vulkan expects (accordingly to your layout binding) something like
struct Vertex {
vec4 x;
vec2 u;
};
Vertex VertexData[] = {...};
because you set VK_VERTEX_INPUT_RATE_VERTEX in your vertex_binding_descriptions.inputRate field.
And it seems that in your case you should set VK_VERTEX_INPUT_RATE_INSTANCE instead to work with buffers after each other.
Fix: Have seen your new comment, it looks like i misunderstood your vertices layout, so it won't help.

llvm code optimization options do not work

I am reading about LLVM Code optimization.I tried to apply opt command options on a number of examples but they do not have any effect. For example.Here is a c++ code called deadCode.cpp:
#include<stdio.h>
int square(int x){
return x*x;
}
int main(){
int a=2;
int b=3;
int c=4;
int result =square(a);
printf("%d\n",b);
}
I generated the LLVM IR with clang like this:
clang -emit-llvm -S deadCode.cpp -o deadCodeBefore
and the result file deadCodeBefore content is :
; ModuleID = 'deadCode.cpp'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; Function Attrs: nounwind uwtable
define i32 #_Z6squarei(i32 %x) #0 {
%1 = alloca i32, align 4
store i32 %x, i32* %1, align 4
%2 = load i32, i32* %1, align 4
%3 = load i32, i32* %1, align 4
%4 = mul nsw i32 %2, %3
ret i32 %4
}
; Function Attrs: norecurse uwtable
define i32 #main() #1 {
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%1 = load i32, i32* %a, align 4
%2 = call i32 #_Z6squarei(i32 %1)
store i32 %2, i32* %result, align 4
%3 = load i32, i32* %b, align 4
%4 = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i32 0, i32 0), i32 %3)
ret i32 0
}
declare i32 #printf(i8*, ...) #2
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
the optimization command I used:
opt -S -adce deadCodeBefore -o deadCodeAfter1
As I read it should remove the call to square function and also the declaration of c variable because they have no effect. But the result is the same. Here is deadCodeAfter1 which is the same as deadCodeBefore:
; ModuleID = 'deadCodeBefore'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
#.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; Function Attrs: nounwind uwtable
define i32 #_Z6squarei(i32 %x) #0 {
%1 = alloca i32, align 4
store i32 %x, i32* %1, align 4
%2 = load i32, i32* %1, align 4
%3 = load i32, i32* %1, align 4
%4 = mul nsw i32 %2, %3
ret i32 %4
}
; Function Attrs: norecurse uwtable
define i32 #main() #1 {
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%1 = load i32, i32* %a, align 4
%2 = call i32 #_Z6squarei(i32 %1)
store i32 %2, i32* %result, align 4
%3 = load i32, i32* %b, align 4
%4 = call i32 (i8*, ...) #printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* #.str, i32 0, i32 0), i32 %3)
ret i32 0
}
declare i32 #printf(i8*, ...) #2
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
Because it's doing exactly as it is supposed to do. It checks in the IR if an instruction is being used by some other instruction or not. If not only then it removes it. For example in your code, declaration of variable %a (%a = alloca i32, align 4) is being used in the store instruction store i32 2, i32* %a, align 4
If you had just declare a variable and not assigned any value to it, then adce pass would have eliminated it. You can see that by just declaring a variable like int e; and run the optimization on it.
Usually passes in LLVM are dependent on the output of some other pass in order to be effective. An individual pass on itself might not give you the result that you might have expected it to provide.

Accumulator not reset between 2 consecutive calls to R.reduce in a R.pipe

Considering this code, using Ramda 0.21.0:
var iteratee = (acc, [k, v]) => {
acc[k] = ++v;
return acc
}
var foo = R.pipe(
R.toPairs,
R.reduce(iteratee, {})
)
console.log(foo({ a: 1, b: 2})) // { a: 2, b: 3 }
console.log(foo({ c: 3, d: 4})) // { a: 2, b: 3, c: 4, d: 5 }
Why does the second call to foo display { a: 2, b: 3, c: 4, d: 5 } instead of { c: 4, d: 5 }?
Is there some kind of memoization going on? I would expect the initial value of acc to be reset to {} each time foo is applied.
This answer mostly expands on the comments by #iofjuupasli
The problem is the mutation of the accumulator object. You create one in the definition of foo which is reused on every call, and then you update it in iteratee (horrible name, IMHO. Call it bar or something. :-) ). There are several ways you could fix this. One might be to make sure that you pass a new accumulator on each call to foo:
var iteratee = (acc, [k, v]) => {
acc[k] = ++v;
return acc
}
var foo = R.pipe(
R.toPairs,
list => R.reduce(iteratee, {}, list)
)
foo({ a: 1, b: 2}); //=> {"a": 2, "b": 3}
foo({ c: 3, d: 4}); //=> {"c": 4, "d": 5}
This works, but feels unsatisfying. Perhaps more helpful would be to avoid mutating the accumulator object on each pass. assoc will create a new object that reuses as much of the previous one as possible:
var iteratee = (acc, [k, v]) => R.assoc(k, v + 1, acc)
var foo = R.pipe(
R.toPairs,
R.reduce(iteratee, {})
);
foo({ a: 1, b: 2}); //=> {"a": 2, "b": 3}
foo({ c: 3, d: 4}); //=> {"c": 4, "d": 5}
This seems cleaner. But in fact Ramda has a much simpler solution. The map function treats objects as functors to be mapped over. Combining this with inc, which simply increments a value, we can just do this:
var foo = R.map(R.inc);
foo({ a: 1, b: 2}); //=> {"a": 2, "b": 3}
foo({ c: 3, d: 4}); //=> {"c": 4, "d": 5}
And that feels really clean!