Not understanding the data flow in UNET-like architetures and having problems with the output of the Conv2DTranspose layers - tensorflow

I have a problem or two with the input dimensions of modified U-Net architecture. In order to save your time and better understand/reproduce my results, I'll post the code and the output dimensions. The modified U-Net architecture is the MultiResUNet architecture from https://github.com/nibtehaz/MultiResUNet/blob/master/MultiResUNet.py. and is based on this paper https://arxiv.org/abs/1902.04049 Please Don't be turned off by the length of this code. You can simply copy-paste it and it shouldn't take longer than 10 seconds to reproduce my results. Also you don't need a dataset for this. Tested with TF.v1.9 Keras v.2.20.
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate, BatchNormalization, Activation, add
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu
###{ 2D Convolutional layers
# Arguments: ######################################################################
# x {keras layer} -- input layer #
# filters {int} -- number of filters #
# num_row {int} -- number of rows in filters #
# num_col {int} -- number of columns in filters #
# Keyword Arguments:
# padding {str} -- mode of padding (default: {'same'})
# strides {tuple} -- stride of convolution operation (default: {(1, 1)})
# activation {str} -- activation function (default: {'relu'})
# name {str} -- name of the layer (default: {None})
# Returns:
# [keras layer] -- [output layer]}
# # ############################################################################
def conv2d_bn(x, filters ,num_row,num_col, padding = "same", strides = (1,1), activation = 'relu', name = None):
x = Conv2D(filters,(num_row, num_col), strides=strides, padding=padding, use_bias=False)(x)
x = BatchNormalization(axis=3, scale=False)(x)
if(activation == None):
return x
x = Activation(activation, name=name)(x)
return x
# our 2D transposed Convolution with batch normalization
# 2D Transposed Convolutional layers
# Arguments: #############################################################
# x {keras layer} -- input layer #
# filters {int} -- number of filters #
# num_row {int} -- number of rows in filters #
# num_col {int} -- number of columns in filters
# Keyword Arguments:
# padding {str} -- mode of padding (default: {'same'})
# strides {tuple} -- stride of convolution operation (default: {(2, 2)})
# name {str} -- name of the layer (default: {None})
# Returns:
# [keras layer] -- [output layer] ###################################
def trans_conv2d_bn(x, filters, num_row, num_col, padding='same', strides=(2, 2), name=None):
x = Conv2DTranspose(filters, (num_row, num_col), strides=strides, padding=padding)(x)
x = BatchNormalization(axis=3, scale=False)(x)
return x
# Our Multi-Res Block
# Arguments: ############################################################
# U {int} -- Number of filters in a corrsponding UNet stage #
# inp {keras layer} -- input layer #
# Returns: #
# [keras layer] -- [output layer] #
###################################################################
def MultiResBlock(U, inp, alpha = 1.67):
W = alpha * U
shortcut = inp
shortcut = conv2d_bn(shortcut, int(W*0.167) + int(W*0.333) +
int(W*0.5), 1, 1, activation=None, padding='same')
conv3x3 = conv2d_bn(inp, int(W*0.167), 3, 3,
activation='relu', padding='same')
conv5x5 = conv2d_bn(conv3x3, int(W*0.333), 3, 3,
activation='relu', padding='same')
conv7x7 = conv2d_bn(conv5x5, int(W*0.5), 3, 3,
activation='relu', padding='same')
out = concatenate([conv3x3, conv5x5, conv7x7], axis=3)
out = BatchNormalization(axis=3)(out)
out = add([shortcut, out])
out = Activation('relu')(out)
out = BatchNormalization(axis=3)(out)
return out
# Our ResPath:
# ResPath
# Arguments:#######################################
# filters {int} -- [description]
# length {int} -- length of ResPath
# inp {keras layer} -- input layer
# Returns:
# [keras layer] -- [output layer]#############
def ResPath(filters, length, inp):
shortcut = inp
shortcut = conv2d_bn(shortcut, filters, 1, 1,
activation=None, padding='same')
out = conv2d_bn(inp, filters, 3, 3, activation='relu', padding='same')
out = add([shortcut, out])
out = Activation('relu')(out)
out = BatchNormalization(axis=3)(out)
for i in range(length-1):
shortcut = out
shortcut = conv2d_bn(shortcut, filters, 1, 1,
activation=None, padding='same')
out = conv2d_bn(out, filters, 3, 3, activation='relu', padding='same')
out = add([shortcut, out])
out = Activation('relu')(out)
out = BatchNormalization(axis=3)(out)
return out
# MultiResUNet
# Arguments: ############################################
# height {int} -- height of image
# width {int} -- width of image
# n_channels {int} -- number of channels in image
# Returns:
# [keras model] -- MultiResUNet model###############
def MultiResUnet(height, width, n_channels):
inputs = Input((height, width, n_channels))
# downsampling part begins here
mresblock1 = MultiResBlock(32, inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(mresblock1)
mresblock1 = ResPath(32, 4, mresblock1)
mresblock2 = MultiResBlock(32*2, pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(mresblock2)
mresblock2 = ResPath(32*2, 3, mresblock2)
mresblock3 = MultiResBlock(32*4, pool2)
pool3 = MaxPooling2D(pool_size=(2, 2))(mresblock3)
mresblock3 = ResPath(32*4, 2, mresblock3)
mresblock4 = MultiResBlock(32*8, pool3)
# Upsampling part
up5 = concatenate([Conv2DTranspose(
32*4, (2, 2), strides=(2, 2), padding='same')(mresblock4), mresblock3], axis=3)
mresblock5 = MultiResBlock(32*8, up5)
up6 = concatenate([Conv2DTranspose(
32*4, (2, 2), strides=(2, 2), padding='same')(mresblock5), mresblock2], axis=3)
mresblock6 = MultiResBlock(32*4, up6)
up7 = concatenate([Conv2DTranspose(
32*2, (2, 2), strides=(2, 2), padding='same')(mresblock6), mresblock1], axis=3)
mresblock7 = MultiResBlock(32*2, up7)
conv8 = conv2d_bn(mresblock7, 1, 1, 1, activation='sigmoid')
model = Model(inputs=[inputs], outputs=[conv8])
return model
So now back to my problem with mismatched input/output dimensions in the UNet architecture.
If I choose filter height/width (128,128) or (256,256) or (512,512) and do :
model = MultiResUnet(128, 128,3)
display(model.summary())
Tensorflow gives me a perfect result of how the whole architecture looks like. Now if I do this
model = MultiResUnet(36, 36,3)
display(model.summary())
I get this error :
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
----> 1 model = MultiResUnet(36, 36,3)
2 display(model.summary())
in MultiResUnet(height, width,
n_channels)
25
26 up5 = concatenate([Conv2DTranspose(
---> 27 32*4, (2, 2), strides=(2, 2), padding='same')(mresblock4), mresblock3], axis=3)
28 mresblock5 = MultiResBlock(32*8, up5)
29
~/miniconda3/envs/MastersThenv/lib/python3.6/site-packages/tensorflow/python/keras/layers/merge.py
in concatenate(inputs, axis, **kwargs)
682 A tensor, the concatenation of the inputs alongside axis axis.
683 """
--> 684 return Concatenate(axis=axis, **kwargs)(inputs)
685
686
~/miniconda3/envs/MastersThenv/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, inputs, *args, **kwargs)
694 if all(hasattr(x, 'get_shape') for x in input_list):
695 input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs)
--> 696 self.build(input_shapes)
697
698 # Check input assumptions set after layer building, e.g. input shape.
~/miniconda3/envs/MastersThenv/lib/python3.6/site-packages/tensorflow/python/keras/utils/tf_utils.py
in wrapper(instance, input_shape)
146 else:
147 input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())
--> 148 output_shape = fn(instance, input_shape)
149 if output_shape is not None:
150 if isinstance(output_shape, list):
~/miniconda3/envs/MastersThenv/lib/python3.6/site-packages/tensorflow/python/keras/layers/merge.py
in build(self, input_shape)
388 'inputs with matching shapes '
389 'except for the concat axis. '
--> 390 'Got inputs shapes: %s' % (input_shape))
391
392 def _merge_function(self, inputs):
ValueError: A Concatenate layer requires inputs with matching shapes
except for the concat axis. Got inputs shapes: [(None, 8, 8, 128),
(None, 9, 9, 128)]
Why does the Conv2DTranspose give me the wrong dimension
(None, 8, 8, 128)
instead of
(None, 9, 9, 128)
and why doesn't the Concat function complain when I choose filter sizes like (128,128),(256,256) and etc. (multiples of 32)
So to generalize this question how can I make this UNet architecture work with any filter size and how can I deal with the Conv2DTranspose layer producing an output that has one dimension less(width/height) than the actually needed dimension(when the filter size isn't a multiple of 32 or is not symmetric) and why doesn't this happen with other filter sizes that are a multiple of the 32. And what If I had variable Input sizes ??
Any help would be highly appreciated.
cheers,
H

U-Net family of models (such as the MultiResUNet model above) follow an encoder-decoder architecture. Encoder is a down-sampling path with feature extraction whereas the decoder an upsampling one. Feature maps from encoder are concatenated at the decoder through skip-connections. These feature maps are concatenated at the last axis, the 'channel' axis (considering the features to be having dimensions [batch_size, height, width, channels]). Now, for the features to be concatenated at any axis ('channel' axis, in our case), the dimensions at all the other axes must match.
In the above model architecture, there are 3 downsampling/max-pooling operations being performed (through MaxPooling2D)in the encoder path. At the decoder path 3 upsampling/transpose-conv operations are performed, aiming to restore the image back to the full dimension. However, for the concatenations (through skip-connections) to happen, the downsampled and upsampled feature dimensions of height, width & batch_size should remain identical at every "level" of the model. I'll illustrate this with the examples you mentioned in the question:
1st case : Input dimensions (128,128,3) : 128 -> 64 -> 32 -> 16 -> 32 -> 64 -> 128
2nd case: Input dimensions (36,36,3) : 36 -> 18 -> 9 -> 4 -> 8 -> 16 -> 32
In the 2nd case, when the height and width of feature map reaches 9 in the encoder path, further downsampling leads to a dimension change (loss) that cannot be regained in the decoder while upsampling. Hence, it throws an error due to inability to concatenate feature maps of dimensions [(None, 8, 8, 128)] & [(None, 9, 9, 128)].
In general, for a simple encoder-decoder model (with skip-connections) having 'n' downsampling (MaxPooling2D) layers, the input dimension must be a multiple of 2^n to be able to concatenate the model's encoder features at the decoder. In this case n=3, hence the input must be a multiple of 8 to not run into these dimension mismatch errors.
Hope this helps! :)

Thanks #Balraj Ashwath for the great answer! Then, if your input has shape h and you want to use this architecture with depth d (h >= 2^d), one possibility is to pad the dimension of h with delta_h zeros, given by the following expression:
import numpy as np
h, d = 36, 3
delta_h = np.ceil(h/(2**d)) * (2**d) - h
print(delta_h)
> 4.0
Then, following the example of #Balraj Ashwath:
40 -> 20 -> 10 -> 5 -> 10 -> 20 -> 40

Related

How to make 2 tensors the same length by mean | median imputation of the shortest tensor?

I'm trying to subclass a the base Keras layer to create a layer that will merge the rank 1 output of 2 layers of a skip connection by outputting the Dot product of 2 tensors. The 2 incoming tensors are created by Dense layers parsed by a Neural Architecture Search algorithm that randomly selects the number of Dense units and hence the length of the 2 tensors. These of course will usually not be of the same length. I am trying an experiment to see if casting them to the same length by means of appending the shorter tensor with a mathematically meaningful imputation: [e.g. mean | median | hypotenuse | cos | ... etc] then merging them by means of the dot product will outperform Add or Concatenate merging strategies. To make them the same length:
I try the overall strategy:
Find the shorter tensor.
Pass it to tf.reduce_mean() (aliasing the resulting mean as "rm" for the sake of discussion).
Create a list of [rm for rm in range(['difference in length of the longer tensor and the shorter tensor']). Cast as a tensor if necessary.
[pad | concatenate] the shorter tensor with the result of the operation above to make it equal in length.
Here is where I am running into a dead wall:
Since the tf operation reduce_mean is returning a future with its shape set as None (not assumed to be a scalar of 1), they are in a state of having a shape of '(None,)', which the tf.keras.layers.Dot layer refuses to ingest and throws a ValueError, as it does not see them as being the same length, though they always will be:
KerasTensor(type_spec=TensorSpec(shape=(None,), dtype=tf.float32, name=None), name='tf.math.reduce_mean/Mean:0', description="created by layer 'tf.math.reduce_mean'")
ValueError: A Concatenate layer should be called on a list of at least 1 input. Received: input_shape=[[(None,), (None,)], [(None, 3)]]
My code (in the package/module):
import tensorflow as tf
import numpy as np
class Linear1dDot(tf.keras.layers.Layer):
def __init__(self, input_dim=None,):
super(Linear1dDot, self).__init__()
def __call__(self, inputs):
max_len = tf.reduce_max(tf.Variable(
[inp.shape[1] for inp in inputs]))
print(f"max_len:{max_len}")
for i in range(len(inputs)):
inp = inputs[i]
print(inp.shape)
inp_lenght = inp.shape[1]
if inp_lenght < max_len:
print(f"{inp_lenght} < {max_len}")
# pad_with = inp.reduce_mean()
pad_with = tf.reduce_mean(inp, axis=1)
print(pad_with)
padding = [pad_with for _ in range(max_len - inp_lenght)]
inputs[i] = tf.keras.layers.concatenate([padding, [inp]])
# inputs[i] = tf.reshape(
# tf.pad(inp, padding, mode="constant"), (None, max_len))
print(inputs)
return tf.keras.layers.Dot(axes=1)(inputs)
...
# Alternatively substituting the last few lines with:
pad_with = tf.reduce_mean(inp, axis=1, keepdims=True)
print(pad_with)
padding = tf.keras.layers.concatenate(
[pad_with for _ in range(max_len - inp_lenght)])
inputs[i] = tf.keras.layers.concatenate([padding, [inp]])
# inputs[i] = tf.reshape(
# tf.pad(inp, padding, mode="constant"), (None, max_len))
print(inputs)
return tf.keras.layers.Dot(axes=1)(inputs)
... and countless other permutations of attempts ...
Does anyone know a workaround or have any advice? (other than 'Don't try to do this.')?
In the parent folder of this module's package ...
Test to simulate a skip connection merging into the current layer:
from linearoneddot.linear_one_d_dot import Linear1dDot
x = tf.constant([1, 2, 3, 4, 5])
y = tf.constant([0, 9, 8])
inp1 = tf.keras.layers.Input(shape=3)
inp2 = tf.keras.layers.Input(shape=5)
xd = tf.keras.layers.Dense(3, "relu")(inp1)
yd = tf.keras.layers.Dense(5, 'elu')(inp2)
combined = Linear1dDot()([xd, yd]) # tf.keras.layers.Dot(axes=1)([xd, yd])
z = tf.keras.layers.Dense(2)(combined)
model = tf.keras.Model(inputs=[inp1, inp2], outputs=z) # outputs=z)
print(model([x, y]))
print(model([np.random.random((3, 3)), np.random.random((3, 5))]))
Does anyone know a workaround that will be able to get the mean of the shorter rank 1 tensor as a scalar, which I can then append / pad to the shorter tensor to a set intended langth (same length as the longer tensor).
Try this, hope this will work, Try to padd the shortest input with 1, and then concat it with the input then take the dot product, then finally subtract the extra ones which were added in the dot product...
class Linear1dDot(tf.keras.layers.Layer):
def __init__(self,**kwargs):
super(Linear1dDot, self).__init__()
def __call__(self, inputs):
_input1 , _input2 = inputs
_input1_shape = _input1.shape[1]
_input2_shape = _input2.shape[1]
difference = tf.math.abs(_input1_shape - _input2_shape)
padded_input = tf.ones(shape=(1,difference))
if _input1_shape > _input2_shape:
padded_tensor = tf.concat([_input2 ,padded_input],axis=1)
scaled_output = tf.keras.layers.Dot(axes=1)([padded_tensor, _input1])
scaled_output -= tf.reduce_sum(padded_input)
return scaled_output
else:
padded_tensor = tf.concat([_input1 , padded_input],axis=1)
scaled_output = tf.keras.layers.Dot(axes=1)([padded_tensor, _input2])
scaled_output -= tf.reduce_sum(padded_input)
return scaled_output
x = tf.constant([[1, 2, 3, 4, 5, 9]])
y = tf.constant([[0, 9, 8]])
inp1 = tf.keras.layers.Input(shape=3)
inp2 = tf.keras.layers.Input(shape=5)
xd = tf.keras.layers.Dense(5, "relu")(x)
yd = tf.keras.layers.Dense(3, 'elu')(y)
combined = Linear1dDot()([xd, yd]) # tf.keras.layers.Dot(axes=1)([xd, yd])
Output:
<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[4.4694786]], dtype=float32)>

Tensorflow - different image size for training and inference

I trained my u-net architecture on images with 1024*1024 size. And now want to inference with high resolution images with different sizes and aspect ratios. But I got error:
Conv2DSlowBackpropInput: Size of out_backprop doesn't match computed: actual = 64, computed = 32 spatial_dim: 2 input: 64 filter: 3 output: 64 stride: 2 dilation: 1
[[node model_1/sequential_8/conv2d_transpose_8/conv2d_transpose (defined at tmp/ipykernel_40/3635850533.py:1) ]] [Op:__inference_predict_function_9960]
Function call stack:
predict_function
Model architecture:
base_model = tf.keras.applications.MobileNetV2(input_shape=[1024, 1024, 3], include_top=False)
# Use the activations of these layers
layer_names = [
'block_1_expand_relu', # 64x64
'block_3_expand_relu', # 32x32
'block_6_expand_relu', # 16x16
'block_13_expand_relu', # 8x8
'block_16_project', # 4x4
]
base_model_outputs = [base_model.get_layer(name).output for name in layer_names]
# Create the feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=base_model_outputs)
down_stack.trainable = False
up_stack = [
pix2pix.upsample(512, 3), # 4x4 -> 8x8
pix2pix.upsample(256, 3), # 8x8 -> 16x16
pix2pix.upsample(128, 3), # 16x16 -> 32x32
pix2pix.upsample(64, 3), # 32x32 -> 64x64
]
def unet_model(output_channels:int):
inputs = tf.keras.layers.Input(shape=[1024, 1024, 3])
# Downsampling through the model
skips = down_stack(inputs)
x = skips[-1]
skips = reversed(skips[:-1])
# Upsampling and establishing the skip connections
for up, skip in zip(up_stack, skips):
x = up(x)
concat = tf.keras.layers.Concatenate()
x = concat([x, skip])
# This is the last layer of the model
last = tf.keras.layers.Conv2DTranspose(
filters=output_channels, kernel_size=3, strides=2,
padding='same', activation='sigmoid') #64x64 -> 128x128 #
x = last(x)
return tf.keras.Model(inputs=inputs, outputs=x)

Tensorflow CNN model incompatible shape error

I have an array with a shape of [274 documents, 439 equal length sentences per document, 384-dimensional sbert embeddings per sentence]. I'm trying to fit a CNN model that predicts a binary value per document.
Below is the model architecture:
embedding_layer = Embedding(274, 384, input_length=439)
sequence_input = Input(shape=(439,))
embedded_sequences = embedding_layer(sequence_input)
# first conv filter
embedded_sequences = Reshape((439, 384, 1))(embedded_sequences)
x = Conv2D(100, (5, 384), activation='relu')(embedded_sequences)
x = MaxPooling2D((439 - 5 + 1, 1))(x)
# second conv filter
y = Conv2D(100, (4, 384), activation='relu')(embedded_sequences)
y = MaxPooling2D((439 - 4 + 1, 1))(y)
# third conv filter
z = Conv2D(100, (3, 384), activation='relu')(embedded_sequences)
z = MaxPooling2D((439 - 3 + 1, 1))(z)
# concatenate the convolutional layers
alpha = concatenate([x,y,z])
# flatten the concatenated values
alpha = Flatten()(alpha)
# add dropout
alpha = Dropout(0.5)(alpha)
# make predictions
preds = Dense(274, activation='softmax')(alpha)
# build model
model = Model(sequence_input, preds)
adadelta = optimizers.Adadelta()
model.compile(loss='categorical_crossentropy',
optimizer=adadelta,
metrics=['acc'])
model.fit(x=X_train_sent_emb_3m, y=y_train_sent_emb_3m, epochs=25 , validation_data=(X_test_sent_emb_3m, y_test_sent_emb_3m))
The model compiles but when I run the fit call I'm getting the following error message:
Epoch 1/25
WARNING:tensorflow:Model was constructed with shape (None, 439) for input KerasTensor(type_spec=TensorSpec(shape=(None, 439), dtype=tf.float32, name='input_15'), name='input_15', description="created by layer 'input_15'"), but it was called on an input with incompatible shape (None, 439, 384).
...
ValueError: total size of new array must be unchanged, input_shape = [439, 384, 384], output_shape = [439, 384, 1]
Any suggestions on what I need to change to make the model work for the shape of the data?

Neural network output issue

I built a neural network with tensorflow, here the code :
class DQNetwork:
def __init__(self, state_size, action_size, learning_rate, name='DQNetwork'):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
with tf.variable_scope(name):
# We create the placeholders
self.inputs_ = tf.placeholder(tf.float32, shape=[state_size[1], state_size[0]], name="inputs")
self.actions_ = tf.placeholder(tf.float32, [None, self.action_size], name="actions_")
# Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
self.target_Q = tf.placeholder(tf.float32, [None], name="target")
self.fc = tf.layers.dense(inputs = self.inputs_,
units = 50,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation = tf.nn.elu)
self.output = tf.layers.dense(inputs = self.fc,
units = self.action_size,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation=None)
# Q is our predicted Q value.
self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_))
# The loss is the difference between our predicted Q_values and the Q_target
# Sum(Qtarget - Q)^2
self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
But i have an issue with the output,
the output should normaly be at the same size than "action_size", and action_size value is 3
but i got an output like [[5][3]] instead of just [[3]] and i realy don't understand why...
This network got 2 dense layers, one with 50 perceptrons and the other with 3 perceptrons (= action_size).
state_size is format : [[9][5]]
If someone know why my output is two dimensions i will be very thankful
Your self.inputs_ placeholder has shape (5, 9). You perform the matmul(self.inputs_, fc1.w) operation in dense layer fc1 which has shape (9, 50) and it results in shape (5, 50). You then apply another dense layer with shape (50, 3) which results in output shape (5, 3).
The same schematically:
matmul(shape(5, 9), shape(9, 50)) ---> shape(5, 50) # output of 1st dense layer
matmul(shape(5, 50), shape(50, 3)) ---> shape(5, 3) # output of 2nd dense layer
Usually, the first dimension of the input placeholder represents batch size and the second dimension is the dimension of inputs feature vector. So for each sample in a batch you (batch size is 5 in your case) you get the output shape 3.
To get probabilities, use this:
import tensorflow as tf
import numpy as np
inputs_ = tf.placeholder(tf.float32, shape=(None, 9))
actions_ = tf.placeholder(tf.float32, shape=(None, 3))
fc = tf.layers.dense(inputs=inputs_, units=2)
output = tf.layers.dense(inputs=fc, units=3)
reduced = tf.reduce_mean(output, axis=0)
probs = tf.nn.softmax(reduced) # <--probabilities
inputs_vals = np.ones((5, 9))
actions_vals = np.ones((1, 3))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(probs.eval({inputs_:inputs_vals,
actions_:actions_vals}))
# [0.01858923 0.01566187 0.9657489 ]

How to get CNN kernel values in Tensorflow

I am using the code below to create CNN layers.
conv1 = tf.layers.conv2d(inputs = input, filters = 20, kernel_size = [3,3],
padding = "same", activation = tf.nn.relu)
and I want to get the values of all kernels after training. It does not work it I simply do
kernels = conv1.kernel
So how should I retrieve the value of these kernels? I am also not sure what variables and method does conv2d has since tensorflow don't really tell it in conv2d class.
You can find all the variables in list returned by tf.global_variables() and easily lookup for variable you need.
If you wish to get these variables by name, declare a layer as:
conv_layer_1 = tf.layers.conv2d(activation=tf.nn.relu,
filters=10,
inputs=input_placeholder,
kernel_size=(3, 3),
name="conv1", # NOTE THE NAME
padding="same",
strides=(1, 1))
Recover the graph as:
gr = tf.get_default_graph()
Recover the kernel values as:
conv1_kernel_val = gr.get_tensor_by_name('conv1/kernel:0').eval()
Recover the bias values as:
conv1_bias_val = gr.get_tensor_by_name('conv1/bias:0').eval()
You mean you want to get the value of the weights for the conv1 layer.
You haven't actually defined the weights with conv2d, you need to do that. When I create a convolutional layer I use a function that performs all the necessary steps, here's a copy/paste of the function I use to create a each of my convolutional layers:
def _conv_layer(self, name, in_channels, filters, kernel, input_tensor, strides, dtype=tf.float32):
with tf.variable_scope(name):
w = tf.get_variable("w", shape=[kernel, kernel, in_channels, filters],
initializer=tf.contrib.layers.xavier_initializer_conv2d(), dtype=dtype)
b = tf.get_variable("b", shape=[filters], initializer=tf.constant_initializer(0.0), dtype=dtype)
c = tf.nn.conv2d(input_tensor, w, strides, padding='SAME', name=name + "c")
a = tf.nn.relu(c + b, name=name + "_a")
print name + "_a", a.get_shape().as_list(), name + "_w", w.get_shape().as_list(), \
"params", np.prod(w.get_shape().as_list()[1:]) + filters
return a, w.get_shape().as_list()
This is what I use to define 5 convolutional layers, this example is straight out of my code, so note that it's 5 convolutional layers stacked without using max pooling or anything, strides of 2 and 5x5 kernels.
conv1_a, _ = self._conv_layer("conv1", 3, 24, 5, self.imgs4d, [1, 2, 2, 1]) # 24.8 MiB/feature -> 540 x 960
conv2_a, _ = self._conv_layer("conv2", 24, 80, 5, conv1_a, [1, 2, 2, 1]) # 6.2 MiB -> 270 x 480
conv3_a, _ = self._conv_layer("conv3", 80, 256, 5, conv2_a, [1, 2, 2, 1]) # 1.5 MiB -> 135 x 240
conv4_a, _ = self._conv_layer("conv4", 256, 750, 5, conv3_a, [1, 2, 2, 1]) # 0.4 MiB -> 68 x 120
conv5_a, _ = self._conv_layer("conv5", 750, 2048, 5, conv4_a, [1, 2, 2, 1]) # 0.1 MiB -> 34 x 60
There's also a good tutorial on the tensorflow website on how to set up a convolutional network:
https://www.tensorflow.org/tutorials/deep_cnn
The direct answer to your question is that the weights for the convolutional layer are defined there as w, that's the tensor you're asking about if I understand you correctly.