I'm trying to generate MLIR using Tensorflow (2.2.0) as a front-end and I would like to clarify the following.
Let's consider the example below that implements direct matrix multiplication of two 2x2 matrices.
import tensorflow as tf
import tensorflow.mlir as mlir
with tf.Graph().as_default() as g:
with tf.device('/cpu:0'):
#tf.function
def mymatmul(A, B, C):
for i in range(2):
for j in range(2):
cij = 0.0
for k in range(2):
cij += A[i, k]*B[i, j]
C[i, j].assign(cij)
A = tf.constant([[1., 2.], [3., 4.]])
B = tf.constant([[2., 1.], [4., 3.]])
C = tf.Variable([[0., 0.], [0., 0.]])
mymatmul(A, B, C)
tf_mlir_graph = mlir.experimental.convert_graph_def(g.as_graph_def())
print(tf_mlir_graph)
This code emits the following MLIR.
module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 175 : i32}} {
func #main() {
%0 = "tf.Const"() {value = dense<0.000000e+00> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%1 = "tf.Const"() {value = dense<[[2.000000e+00, 1.000000e+00], [4.000000e+00, 3.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%2 = "tf.Const"() {value = dense<[[1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%3 = "tf.VarHandleOp"() {_class = ["loc:#Variable"], container = "", device = "/device:CPU:0", dtype = f32, shape = "tfshape$dim { size: 2 } dim { size: 2 }", shared_name = "Variable"} : () -> tensor<!tf.resource<tensor<2x2xf32>>>
"tf.StatefulPartitionedCall"(%2, %1, %3) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_RESOURCE"], Tout = [], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "/device:CPU:0", executor_type = "", f = #__inference_mymatmul_1160} : (tensor<2x2xf32>, tensor<2x2xf32>, tensor<!tf.resource<tensor<2x2xf32>>>) -> ()
%4 = "tf.VarIsInitializedOp"(%3) {device = "/device:CPU:0"} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<i1>
%5 = "tf.ReadVariableOp"(%3) {device = "/device:CPU:0", dtype = f32} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<2x2xf32>
"tf.AssignVariableOp"(%3, %0) {device = "/device:CPU:0", dtype = f32} : (tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>) -> ()
return
}
func #__inference_mymatmul_1160(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>, %arg2: tensor<!tf.resource>) attributes {tf.signature.is_stateful} {
%0 = "tf.Const"() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32>
%1 = "tf.Const"() {value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
%2 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi32>} : () -> tensor<2xi32>
%3 = "tf.Const"() {value = dense<0> : tensor<2xi32>} : () -> tensor<2xi32>
%4 = "tf.Const"() {value = dense<2> : tensor<2xi32>} : () -> tensor<2xi32>
%5 = "tf.Const"() {value = dense<[2, 1]> : tensor<2xi32>} : () -> tensor<2xi32>
%6 = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
%7 = "tf.Const"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
%8 = "tf.ReadVariableOp"(%arg2) {device = "", dtype = f32} : (tensor<!tf.resource>) -> tensor<*xf32>
%9 = "tf.StridedSlice"(%arg0, %3, %0, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%10 = "tf.StridedSlice"(%arg1, %3, %0, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%11 = "tf.Mul"(%9, %10) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%12 = "tf.AddV2"(%11, %7) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%13 = "tf.StridedSlice"(%arg0, %6, %5, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%14 = "tf.StridedSlice"(%arg1, %6, %5, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%15 = "tf.Mul"(%13, %14) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%16 = "tf.AddV2"(%15, %7) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%17 = "tf.StridedSlice"(%arg0, %0, %4, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%18 = "tf.Mul"(%17, %14) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%19 = "tf.AddV2"(%16, %18) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%20 = "tf.StridedSlice"(%arg1, %0, %4, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%21 = "tf.Mul"(%13, %20) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%22 = "tf.AddV2"(%21, %7) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%23 = "tf.Mul"(%17, %20) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%24 = "tf.AddV2"(%22, %23) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%25 = "tf.StridedSlice"(%arg0, %2, %1, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%26 = "tf.Mul"(%25, %10) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%27 = "tf.AddV2"(%12, %26) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tf.ResourceStridedSliceAssign"(%arg2, %3, %0, %0, %27) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<!tf.resource>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<f32>) -> ()
%28 = "tf.ReadVariableOp"(%arg2) {device = "", dtype = f32} : (tensor<!tf.resource>) -> tensor<*xf32>
%29 = "tf.StridedSlice"(%arg1, %2, %1, %0) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%30 = "tf.Mul"(%9, %29) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%31 = "tf.AddV2"(%30, %7) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%32 = "tf.Mul"(%25, %29) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%33 = "tf.AddV2"(%31, %32) {T = f32, device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tf.ResourceStridedSliceAssign"(%arg2, %2, %1, %0, %33) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<!tf.resource>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<f32>) -> ()
%34 = "tf.ReadVariableOp"(%arg2) {device = "", dtype = f32} : (tensor<!tf.resource>) -> tensor<*xf32>
"tf.ResourceStridedSliceAssign"(%arg2, %6, %5, %0, %19) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<!tf.resource>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<f32>) -> ()
%35 = "tf.ReadVariableOp"(%arg2) {device = "", dtype = f32} : (tensor<!tf.resource>) -> tensor<*xf32>
"tf.ResourceStridedSliceAssign"(%arg2, %0, %4, %0, %24) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<!tf.resource>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<f32>) -> ()
return
}
}
What is interesting, at least for my purposes, is the loss of the loop structure of the computation. In the tf dialect, the loop structure is flattened but I would like the output MLIR to reflect/preserve the original loop structure expressed in the TF operator graph.
I suppose, another way of phrasing this question is to ask whether TensorFlow dialect supports control constructs (in my belief it does via tf.IfOp and tf.WhileOp) and if there are any particular syntax restrictions the input should adhere to in order to retain the loop structure.
What would be the best way to go about this?
P.S. I suspect this might have something to do with eager execution which is the default behavior in tf =>2.0. Maybe someone can verify this?
Thanks,
Modifying the input computation to the following did the trick. I believe the problem was with (at least partly) the use of python variables along with tf variables. The following effectively preserves the symbolic structure of the computation.
with tf.Graph().as_default() as g:
with tf.device('/cpu:0'):
#tf.function
def mymatmul(A, B, C, m, n):
for i in range(m):
for j in range(m):
for k in range(n):
C[i,j].assign(tf.math.add(C[i, j], tf.math.multiply(A[i, k], B[k, j])))
return C
A = tf.constant([[1., 2.], [3., 4.]])
B = tf.constant([[2., 1.], [4., 3.]])
C = tf.Variable((tf.zeros((2, 2), dtype=tf.float32)))
m = tf.constant(2)
n = tf.constant(2)
mymatmul(A, B, C, m, n)
This generates the following MLIR with tf.While.
module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 412 : i32}} {
func #main() {
%0 = "tf.Const"() {value = dense<[[1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%1 = "tf.Const"() {value = dense<[[2.000000e+00, 1.000000e+00], [4.000000e+00, 3.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%2 = "tf.Const"() {value = dense<2> : tensor<i32>} : () -> tensor<i32>
%3 = "tf.Const"() {value = dense<0.000000e+00> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%4 = "tf.VarHandleOp"() {_class = ["loc:#Variable"], allowed_devices = [], container = "", device = "/device:CPU:0", shared_name = "Variable"} : () -> tensor<!tf.resource<tensor<2x2xf32>>>
%5 = "tf.StatefulPartitionedCall"(%0, %1, %4, %2, %2) {_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "/device:CPU:0", executor_type = "", f = #__inference_mymatmul_3650} : (tensor<2x2xf32>, tensor<2x2xf32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<i32>) -> tensor<2x2xf32>
%6 = "tf.VarIsInitializedOp"(%4) {device = "/device:CPU:0"} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<i1>
%7 = "tf.ReadVariableOp"(%4) {device = "/device:CPU:0"} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<2x2xf32>
"tf.AssignVariableOp"(%4, %3) {device = "/device:CPU:0"} : (tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>) -> ()
return
}
func #__inference_mymatmul_3650(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>, %arg2: tensor<!tf.resource<tensor<2x2xf32>>>, %arg3: tensor<i32>, %arg4: tensor<i32>) -> tensor<2x2xf32> attributes {tf.signature.is_stateful} {
%0 = "tf.Const"() {value = dense<[[1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%1 = "tf.Const"() {value = dense<[[2.000000e+00, 1.000000e+00], [4.000000e+00, 3.000000e+00]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32>
%2 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
%3 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%4 = "tf.Const"() {value = dense<2> : tensor<i32>} : () -> tensor<i32>
%5:10 = "tf.While"(%3, %4, %3, %2, %4, %4, %4, %arg2, %0, %1) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = #while_body_1410, cond = #while_cond_1400, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<2x2>, #tf.shape<2x2>], parallel_iterations = 10 : i64} : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>, tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>, tensor<2x2xf32>)
%6 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<2x2xf32>
%7 = "tf.Identity"(%6) {device = ""} : (tensor<2x2xf32>) -> tensor<2x2xf32>
return %7 : tensor<2x2xf32>
}
func #while_body_1410(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<!tf.resource<tensor<2x2xf32>>>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>, tensor<2x2xf32>) attributes {tf.signature.is_stateful} {
%0 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%2 = "tf.Maximum"(%arg5, %1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%3 = "tf.FloorDiv"(%2, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%4 = "tf.FloorMod"(%2, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%5 = "tf.AddV2"(%arg2, %arg3) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%6 = "tf.AddV2"(%arg0, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%7 = "tf.NotEqual"(%4, %1) {device = "", incompatible_shape_error = true} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%8 = "tf.Cast"(%7) {Truncate = false, device = ""} : (tensor<i1>) -> tensor<i32>
%9 = "tf.AddV2"(%3, %8) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%10 = "tf.Maximum"(%9, %1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%11:10 = "tf.While"(%1, %10, %1, %0, %2, %arg6, %arg7, %arg2, %arg8, %arg9) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = #while_body_1830, cond = #while_cond_1820, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<2x2>, #tf.shape<2x2>], parallel_iterations = 10 : i64} : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>)
%12 = "tf.Identity"(%6) {device = ""} : (tensor<i32>) -> tensor<i32>
%13 = "tf.Identity"(%arg1) {device = ""} : (tensor<i32>) -> tensor<i32>
%14 = "tf.Identity"(%5) {device = ""} : (tensor<i32>) -> tensor<i32>
return %12, %13, %14, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9 : tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2x2xf32>, tensor<2x2xf32>
}
func #while_body_1830(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<!tf.resource<tensor<2x2xf32>>>, %arg7: tensor<i32>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>) attributes {tf.signature.is_stateful} {
%0 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%2 = "tf.Maximum"(%arg5, %1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%3 = "tf.FloorDiv"(%2, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%4 = "tf.FloorMod"(%2, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%5 = "tf.AddV2"(%arg2, %arg3) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%6 = "tf.AddV2"(%arg0, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%7 = "tf.NotEqual"(%4, %1) {device = "", incompatible_shape_error = true} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%8 = "tf.Cast"(%7) {Truncate = false, device = ""} : (tensor<i1>) -> tensor<i32>
%9 = "tf.AddV2"(%3, %8) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%10 = "tf.Maximum"(%9, %1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%11:10 = "tf.While"(%1, %10, %1, %0, %2, %arg6, %arg7, %arg2, %arg8, %arg9) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = #while_body_2250, cond = #while_cond_2240, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<2x2>, #tf.shape<2x2>], parallel_iterations = 10 : i64} : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>)
%12 = "tf.Identity"(%6) {device = ""} : (tensor<i32>) -> tensor<i32>
%13 = "tf.Identity"(%arg1) {device = ""} : (tensor<i32>) -> tensor<i32>
%14 = "tf.Identity"(%5) {device = ""} : (tensor<i32>) -> tensor<i32>
return %12, %13, %14, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9 : tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>
}
func #while_body_2250(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<!tf.resource<tensor<2x2xf32>>>, %arg6: tensor<i32>, %arg7: tensor<i32>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>) attributes {tf.signature.is_stateful} {
%0 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.Const"() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32>
%2 = "tf.AddV2"(%arg7, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%3 = "tf.AddV2"(%arg6, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%4 = "tf.Pack"(%3, %2) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%5 = "tf.Pack"(%arg6, %arg7) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%6 = "tf.AddV2"(%arg2, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%7 = "tf.Pack"(%3, %6) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%8 = "tf.Pack"(%6, %2) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%9 = "tf.Pack"(%arg6, %arg2) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%10 = "tf.Pack"(%arg2, %arg7) {axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
%11 = "tf.AddV2"(%arg2, %arg3) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%12 = "tf.ReadVariableOp"(%arg5) {device = ""} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<2x2xf32>
%13 = "tf.ReadVariableOp"(%arg5) {device = ""} : (tensor<!tf.resource<tensor<2x2xf32>>>) -> tensor<2x2xf32>
%14 = "tf.StridedSlice"(%13, %5, %4, %1) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%15 = "tf.StridedSlice"(%arg8, %9, %7, %1) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%16 = "tf.StridedSlice"(%arg9, %10, %8, %1) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<2x2xf32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<f32>
%17 = "tf.Mul"(%15, %16) {device = ""} : (tensor<f32>, tensor<f32>) -> tensor<f32>
%18 = "tf.AddV2"(%14, %17) : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tf.ResourceStridedSliceAssign"(%arg5, %5, %4, %1, %18) {Index = i32, T = f32, begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 3 : i64} : (tensor<!tf.resource<tensor<2x2xf32>>>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<f32>) -> ()
%19 = "tf.Identity"(%arg1) {device = ""} : (tensor<i32>) -> tensor<i32>
%20 = "tf.Identity"(%11) {device = ""} : (tensor<i32>) -> tensor<i32>
%21 = "tf.AddV2"(%arg0, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%22 = "tf.Identity"(%21) {device = ""} : (tensor<i32>) -> tensor<i32>
return %22, %19, %20, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9 : tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>, tensor<!tf.resource<tensor<2x2xf32>>>, tensor<i32>, tensor<i32>, tensor<2x2xf32>, tensor<2x2xf32>
}
func #while_cond_2240(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<!tf.resource<tensor<2x2xf32>>>, %arg6: tensor<i32>, %arg7: tensor<i32>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> tensor<i1> {
%0 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.GreaterEqual"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%2 = "tf.Less"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%3 = "tf.Greater"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%4 = "tf.LogicalAnd"(%2, %3) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%5 = "tf.Less"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%6 = "tf.LogicalAnd"(%1, %5) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%7 = "tf.LogicalOr"(%6, %4) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%8 = "tf.Less"(%arg0, %arg1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%9 = "tf.LogicalAnd"(%8, %7) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%10 = "tf.Identity"(%9) {device = ""} : (tensor<i1>) -> tensor<i1>
return %10 : tensor<i1>
}
func #while_cond_1820(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<!tf.resource<tensor<2x2xf32>>>, %arg7: tensor<i32>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> tensor<i1> {
%0 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.GreaterEqual"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%2 = "tf.Less"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%3 = "tf.Greater"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%4 = "tf.LogicalAnd"(%2, %3) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%5 = "tf.Less"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%6 = "tf.LogicalAnd"(%1, %5) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%7 = "tf.LogicalOr"(%6, %4) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%8 = "tf.Less"(%arg0, %arg1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%9 = "tf.LogicalAnd"(%8, %7) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%10 = "tf.Identity"(%9) {device = ""} : (tensor<i1>) -> tensor<i1>
return %10 : tensor<i1>
}
func #while_cond_1400(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<!tf.resource<tensor<2x2xf32>>>, %arg8: tensor<2x2xf32>, %arg9: tensor<2x2xf32>) -> tensor<i1> {
%0 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
%1 = "tf.GreaterEqual"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%2 = "tf.Less"(%arg3, %0) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%3 = "tf.Greater"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%4 = "tf.LogicalAnd"(%2, %3) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%5 = "tf.Less"(%arg2, %arg4) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%6 = "tf.LogicalAnd"(%1, %5) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%7 = "tf.LogicalOr"(%6, %4) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%8 = "tf.Less"(%arg0, %arg1) {device = ""} : (tensor<i32>, tensor<i32>) -> tensor<i1>
%9 = "tf.LogicalAnd"(%8, %7) {device = ""} : (tensor<i1>, tensor<i1>) -> tensor<i1>
%10 = "tf.Identity"(%9) {device = ""} : (tensor<i1>) -> tensor<i1>
return %10 : tensor<i1>
}
}
This is still too verbose and dense to my liking (and for is my preferred loop construct whenever possible), but that will have to be addressed during dialect conversion and is a different problem altogether.
I have declared a iVar in a class:
#implementation LLVMIRTest{
NSString* ivarTest;
}
When I check for LLVM IR it shows me:
#OBJC_METH_VAR_NAME_ = private global [9 x i8] c"ivarTest\00", section "__TEXT,__objc_methname,cstring_literals", align 1
#OBJC_METH_VAR_TYPE_ = private global [12 x i8] c"#\22NSString\22\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
I have to ask why it is private global mention in LLVM IR. Why not only private?
This is full Module LLVM IR:
; ModuleID = 'LLVMIRTest.m'
source_filename = "LLVMIRTest.m"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
%struct._objc_cache = type opaque
%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
%struct._objc_method = type { i8*, i8*, i8* }
%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8**, i8*, %struct._prop_list_t* }
%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
%struct._prop_t = type { i8*, i8* }
#_objc_empty_cache = external global %struct._objc_cache
#"OBJC_METACLASS_$_NSObject" = external global %struct._class_t
#OBJC_CLASS_NAME_ = private global [11 x i8] c"LLVMIRTest\00", section "__TEXT,__objc_classname,cstring_literals", align 1
#"\01l_OBJC_METACLASS_RO_$_LLVMIRTest" = private global %struct._class_ro_t { i32 1, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
#"OBJC_METACLASS_$_LLVMIRTest" = global %struct._class_t { %struct._class_t* #"OBJC_METACLASS_$_NSObject", %struct._class_t* #"OBJC_METACLASS_$_NSObject", %struct._objc_cache* #_objc_empty_cache, i8* (i8*, i8*)** null, %struct._class_ro_t* #"\01l_OBJC_METACLASS_RO_$_LLVMIRTest" }, section "__DATA, __objc_data", align 8
#"OBJC_CLASS_$_NSObject" = external global %struct._class_t
#"OBJC_IVAR_$_LLVMIRTest.ivarTest" = hidden global i64 8, section "__DATA, __objc_ivar", align 8
#OBJC_METH_VAR_NAME_ = private global [9 x i8] c"ivarTest\00", section "__TEXT,__objc_methname,cstring_literals", align 1
#OBJC_METH_VAR_TYPE_ = private global [12 x i8] c"#\22NSString\22\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
#"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" = private global { i32, i32, [1 x %struct._ivar_t] } { i32 32, i32 1, [1 x %struct._ivar_t] [%struct._ivar_t { i64* #"OBJC_IVAR_$_LLVMIRTest.ivarTest", i8* getelementptr inbounds ([9 x i8], [9 x i8]* #OBJC_METH_VAR_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* #OBJC_METH_VAR_TYPE_, i32 0, i32 0), i32 3, i32 8 }] }, section "__DATA, __objc_const", align 8
#"\01l_OBJC_CLASS_RO_$_LLVMIRTest" = private global %struct._class_ro_t { i32 0, i32 8, i32 16, i8* null, i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* #"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" to %struct._ivar_list_t*), i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
#"OBJC_CLASS_$_LLVMIRTest" = global %struct._class_t { %struct._class_t* #"OBJC_METACLASS_$_LLVMIRTest", %struct._class_t* #"OBJC_CLASS_$_NSObject", %struct._objc_cache* #_objc_empty_cache, i8* (i8*, i8*)** null, %struct._class_ro_t* #"\01l_OBJC_CLASS_RO_$_LLVMIRTest" }, section "__DATA, __objc_data", align 8
#"OBJC_LABEL_CLASS_$" = private global [1 x i8*] [i8* bitcast (%struct._class_t* #"OBJC_CLASS_$_LLVMIRTest" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
#llvm.compiler.used = appending global [5 x i8*] [i8* getelementptr inbounds ([11 x i8], [11 x i8]* #OBJC_CLASS_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* #OBJC_METH_VAR_NAME_, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* #OBJC_METH_VAR_TYPE_, i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* #"\01l_OBJC_$_INSTANCE_VARIABLES_LLVMIRTest" to i8*), i8* bitcast ([1 x i8*]* #"OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
!llvm.ident = !{!6}
!0 = !{i32 1, !"Objective-C Version", i32 2}
!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
!4 = !{i32 1, !"Objective-C Class Properties", i32 64}
!5 = !{i32 1, !"PIC Level", i32 2}
!6 = !{!"Apple LLVM version 8.0.0 (clang-800.0.38)"}
Global variables are considered module scoped that are initialized at compile time versus runtime.
Most front-ends to LLVM put strings at the module scope and those require a global or a constant tag. There is a way to store strings on the stack instead but that is a rare occurrence in my experience. I typically create strings as
#somename = internal constant....
The private is instructing not to expose the symbols outside of the module.
I assume this is how Objective-C adds type and instrumentation support for classes. If you look at the rest of the output it is likely that the pointers to those strings are getting passed into the RT library (e.g. %x = load ...) prior to some call.
Update after OP added listing
What you've listed is the 'static' module level constructs representing your class. If you look closely, the class declaration consists of a number of embedded structures and strings. Because these declarations are at the module level they are declared global which puts them, as you would expect, in the data segment as it is data after-all and not method implementation. Think of your class as a structure containing not only space for your variable 'iVar` but additional information necessary for the Objective-C RT.
So, classes and their variables are considered module level declarations which are represented as data structures at the module level which is marked global by LLVM standards. See LLVM Global Variable documentation.