Is there a way to divide the keras mobilenetv2 model into submodels? - tensorflow

I am trying to divide the mobilenetv2 model into 2 parts.
I first want to run the first part of the model, save the output, and feed it later on to the second model for certain reasons. I've tried code found here,
but I get the following error:
ValueError: A merge layer should be called on a list of inputs.
I think it is because the model isn't a Sequential.
Can someone help?

As I mentioned in my comments, some layers in mobile_net_v2 expect more than one inputs which are outputs of some other previous layers. Therefore adding them to a sequential model individually causes errors. I have an alternative solution for you. Using the mobile_net_v2 implementation (of my own) in this link, I was able to create the models you want:
import tensorflow as tf
from tensorflow.keras import layers, Model, Sequential
def conv_block(input_tensor, c, s, t, expand=True):
"""
Convolutional Block for mobile net v2
Args:
input_tensor (keras tensor): input tensor
c (int): output channels
s (int): stride size of first layer in the series
t (int): expansion factor
expand (bool): expand filters or not?
Returns: keras tensor
"""
first_conv_channels = input_tensor.get_shape()[-1]
if expand:
x = layers.Conv2D(
first_conv_channels*t,
1,
1,
padding='same',
use_bias=False
)(input_tensor)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
else:
x = input_tensor
x = layers.DepthwiseConv2D(
3,
s,
'same',
1,
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = layers.Conv2D(
c,
1,
1,
padding='same',
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
if input_tensor.get_shape() == x.get_shape() and s == 1:
return x+input_tensor
return x
def splitted_model(input_shape=(224,224,3)):
input = layers.Input(shape=input_shape)
x = layers.Conv2D(
32,
3,
2,
padding='same',
use_bias=False
)(input)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = conv_block(x, 16, 1, 1, expand=False)
x = conv_block(x, 24, 2, 6)
x = conv_block(x, 24, 1, 6)
x = conv_block(x, 32, 2, 6)
x = conv_block(x, 32, 1, 6)
x = conv_block(x, 32, 1, 6)
x = conv_block(x, 64, 2, 6)
x = conv_block(x, 64, 1, 6)
x = conv_block(x, 64, 1, 6)
x = conv_block(x, 64, 1, 6)
model_f = Model(inputs=input, outputs=x)
input_2 = layers.Input(shape=(x.shape[1:]))
x = conv_block(input_2, 96, 1, 6)
x = conv_block(x, 96, 1, 6)
x = conv_block(x, 96, 1, 6)
x = conv_block(x, 160, 2, 6)
x = conv_block(x, 160, 1, 6)
x = conv_block(x, 160, 1, 6)
x = conv_block(x, 320, 1, 6)
x = layers.Conv2D(
1280,
1,
1,
padding='same',
use_bias=False
)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU(6.0)(x)
x = layers.GlobalAveragePooling2D()(x)
model_h = Model(inputs=input_2, outputs=x)
return model_f, model_h
You could create your two models as such:
IMG_SIZE = 160
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
model_f, model_h = splitted_model(input_shape=IMG_SHAPE)
Note that the weights are randomly initialized. If you want to have the weights from mobilenet_v2 trained on imagenet, you could run the following code to copy weights:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
layer_f_counter = 0
layer_h_counter = 0
for i in range(len(mobile_net.layers)):
if layer_f_counter<len(model_f.layers):
if len(mobile_net.layers[i].get_weights()) > 0:
if len(model_f.layers[layer_f_counter].get_weights()) > 0:
print(mobile_net.layers[i].name,'here', model_f.layers[layer_f_counter].name, layer_f_counter)
model_f.layers[layer_f_counter].set_weights(mobile_net.layers[i].get_weights())
layer_f_counter += 1
print(layer_f_counter)
else:
if len(model_f.layers[layer_f_counter].get_weights()) > 0:
continue
else:
layer_f_counter+=1
else:
if layer_h_counter<len(model_h.layers):
if len(mobile_net.layers[i].get_weights()) > 0:
if len(model_h.layers[layer_h_counter].get_weights()) > 0:
print(mobile_net.layers[i].name,'here', model_h.layers[layer_h_counter].name, layer_h_counter)
model_h.layers[layer_h_counter].set_weights(mobile_net.layers[i].get_weights())
layer_h_counter += 1
print(layer_h_counter)
else:
if len(model_h.layers[layer_h_counter].get_weights()) > 0:
continue
else:
layer_h_counter+=1
It iterates through the layers of mobilenet_v2 loaded from Keras, it copies the weights of the first part to model_f, and the rest to model_h. You could check that the weights are correctly copied by print out some random layer weights from mobile_net and also the new models as follows:
print(model_f.layers[1].get_weights()) # printing weights of first conv layer in model_f
print(mobile_net.get_layer('Conv1').get_weights()) # printing weights of fist conv layer in mobile_net
Also for model_h:
print(model_h.layers[-4].get_weights()) # printing weights of last conv layer in model_h
print(mobile_net.get_layer('Conv_1').get_weights()) # printing weights of last conv layer in mobile_net
Note that I randomly selected which block to separate moile_net into model_f and model_h, you could edit it to change where you want to split. Hope it helps.

Related

Matrix size-incompatible for custom multi model

I am getting the following error:
Node: 'BGNet/dense/BiasAdd'
Matrix size-incompatible: In[0]: [1120,0], In[1]: [2048,1024]
[[{{node BGNet/dense/BiasAdd}}]] [Op:__inference_train_function_11676]
I found the root in this part of the model:
File "<ipython-input-14-3dcbdf5337b8>", line 69, in call
f = self.dense(f)
This is my custom multi model:
class BGNet(tf.keras.Model):
def __init__(self, img_h, img_w, img_c, batch_size, classes):
super(BGNet, self).__init__(name='BGNet')
self.img_h = img_h
self.img_w = img_w
self.img_c = img_c
self.batch_size = batch_size
self.classes = classes
# (224, 224, 3)
self.bgblock0 = BGBlock(f=[32, 32, 32, 32],
k=[7, 5, 5, 5],
d=[1, 2, 2, 1],
stage=0)
# (112, 112, 32)
self.bgblock1 = BGBlock(f=[64, 64, 64, 64],
k=[5, 5, 5, 3],
d=[2, 1, 1, 2],
stage=1)
# (56, 56, 64)
self.bgblock2 = BGBlock(f=[128, 128, 128, 128],
k=[5, 5, 3, 3],
d=[2, 1, 2, 1],
stage=2)
# (28, 28, 128)
self.bgblock3 = BGBlock(f=[256, 256, 256, 256],
k=[5, 3, 3, 3,],
d=[1, 2, 1, 2],
stage=3)
# (14, 14, 256)
self.bgblock4 = BGBlock(f=[512, 512, 512],
k=[3, 3, 3],
d=[1, 1, 2],
stage=4)
# (7, 7, 512)
self.bgblock5 = BGBlock(f=[1024, 1024, 1024],
k=[3, 3, 1],
d=[2, 1, 1],
stage=5)
# (4, 4, 1024)
self.bgblock6 = BGBlock(f=[2048, 2048],
k=[1, 1],
d=[1, 2],
stage=6)
# (2, 2, 2048)
self.flatten = tf.keras.layers.Flatten(name='flatten')
self.dense = tf.keras.layers.Dense(1024, activation='tanh', name='dense')
self.dropout = tf.keras.layers.Dropout(0.2, name='dropout')
self.prob = tf.keras.layers.Dense(1, activation='sigmoid', name='prob')
self.concat1 = tf.keras.layers.Concatenate(axis=-1, name='concat1')
self.bbox1 = tf.keras.layers.Dense(512, activation='relu', name='bbox1')
self.bbox2 = tf.keras.layers.Dropout(0.1, name='bbox2')
self.bbox3 = tf.keras.layers.Dense(256, activation='sigmoid', name='bbox3')
self.bbox = tf.keras.layers.Dense(4, name='bbox')
self.concat2 = tf.keras.layers.Concatenate(axis=-1, name='concat2')
self.cat = tf.keras.layers.Dense(len(self.classes), activation='softmax', name='cat')
def call(self, input_tensor, training=True):
x = self.bgblock0(input_tensor)
x = self.bgblock1(x)
x = self.bgblock2(x)
x = self.bgblock3(x)
x = self.bgblock4(x)
x = self.bgblock5(x)
x = self.bgblock6(x)
f = self.flatten(x)
f = self.dense(f)
f = self.dropout(f)
p = self.prob(f)
b = self.concat1([f, p])
b = self.bbox1(b)
b = self.bbox2(b)
b = self.bbox3(b)
b = self.bbox(b)
c = self.concat2([f, b])
c = self.cat(c)
return {'prob': p, 'bbox': b, 'class': c}
model1 = BGNet(H, W, C, B, N)
model1.build(input_shape=(B, H, W, C))
model1.call(tf.keras.layers.Input(shape=(H, W, C), batch_size=B))
model1.summary(print_fn=tf.print, expand_nested=True, show_trainable=True)
The custom (BGBlocks) blocks are not that important but if you are curious they are convolution blocks consisting of conv2d, batchnorm, activation and pooling layers
The model produces 3 outputs of different size vector while sharing the first dense layers. The output layers first predict the confidence score(prob in loss) of the an object being in the image. Next they predict the bounding box(bbox in loss) and finally the class(class in loss) of the bounded object.
The main issue is after the flatten layer. The model builds without errors with input images of (224, 224, 3). This is how the summary of the model looks: model.summary() image
I have even created a custom IOU (Intersection Over Union) for bounding boxes to be used as model metric. The losses are simple, inbuilt and as follows:
loss = {'prob': 'binary_crossentropy', 'bbox': 'mse', 'class': 'categorical_crossentropy'}
Hoe can I resolve this error?

tf.keras.layers.Conv2D get kernel values

How do I get the Kernel values from tf.keras.layers.Conv2D?
Here is my code:
#input image is 5 X 5 and 1 channel
input_shape = (1, 1, 5, 5)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv2D(
2, 2, activation= tf.nn.relu, input_shape=input_shape,
data_format='channels_first')(x)
I am using tf version 2.2
I have tried y.get_weights() and this didn't work I got:
AttributeError: 'tensorflow.python.framework.ops.EagerTensor'
object has no attribute 'get_weights'
You need to actually store the layer in a variable. In your code, y is the result of the convolution. For example
input_shape = (1, 1, 5, 5)
x = tf.random.normal(input_shape)
conv_layer = tf.keras.layers.Conv2D(
2, 2, activation= tf.nn.relu, input_shape=input_shape,
data_format='channels_first')
y = conv_layer(x)
Now you should be able to use conv_layer.get_weights().

Copy tensor using K.tile()

I have tensor (None, 196) and after reshaping, it becomes (None, 14, 14).
And now, I want to copy it to channel axis, so that the shape should be (None, 14, 14, 512). Lastly, I want to copy to timestep axis, so it becomes (None, 10, 14, 14, 512). I accomplish those steps using this snippet code:
def replicate(tensor, input_target):
batch_size = K.shape(tensor)[0]
nf, h, w, c = input_target
x = K.reshape(tensor, [batch_size, 1, h, w, 1])
# Replicate to channel dimension
x = K.tile(x, [batch_size, 1, 1, 1, c])
# Replicate to timesteps dimension
x = K.tile(x, [batch_size, nf, 1, 1, 1])
return x
x = ...
x = Lambda(replicate, arguments={'input_target':input_shape})(x)
another_x = Input(shape=input_shape) # shape (10, 14, 14, 512)
x = layers.multiply([x, another_x])
x = ...
I plot the model and the output shape is just like I want it to be. But, the problem arises in model training. I set the batch size to 2. This the the error message:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [8,10,14,14,512] vs. [2,10,14,14,512]
[[{{node multiply_1/mul}} = Mul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/multiply_1/mul_grad/Sum"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](Lambda_2/Tile_1, _arg_another_x_0_0/_189)]]
[[{{node metrics/top_k_categorical_accuracy/Mean_1/_265}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_6346_metrics/top_k_categorical_accuracy/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Looks like, K.tile() increases the batch size from 2 to 8. When I set the batch size to 10, it becomes 1000.
So, my question is how to achieve the result as I want? Is it good way to use tile()? Or, should I use repeat_elements()? Thanks!
I am using Tensorflow 1.12.0 and Keras 2.2.4.
As a rule of thumb, try to avoid bringing batch size to the transformations happening in the Lambda layer.
When you use tile operation, you only set only the dimension that needs to change (for example you had batch_size value in your tile operation which is wrong). Also I am using tf.tile instead of K.tile (TF 1.12 doesn't have tile in the Keras backend it seems).
def replicate(tensor, input_target):
_, nf, h, w, c = input_target
x = K.reshape(tensor, [-1, 1, h, w, 1])
# Replicate to channel dimension
# You can combine below lines to tf.tile(x, [1, nf, 1, 1, c]) as well
x = tf.tile(x, [1, 1, 1, 1, c])
# Replicate to timesteps dimension
x = tf.tile(x, [1, nf, 1, 1, 1])
return x
Simple example
input_shape= [None, 10, 14, 14, 512]
x = Input(shape=(196,))
x = Lambda(replicate, arguments={'input_target':input_shape})(x)
print(x.shape)
Which gives
>>> (?, 10, 14, 14, 512)

Problems with reshape in GAN's discriminator (Tensorflow)

I was trying to implement various GANs in Tensorflow (after doing it successfully in PyTorch), and I am having some problems while coding the discriminator part.
The code of the discriminator (very similar to the MNIST CNN tutorial) is:
def discriminator(x):
"""Compute discriminator score for a batch of input images.
Inputs:
- x: TensorFlow Tensor of flattened input images, shape [batch_size, 784]
Returns:
TensorFlow Tensor with shape [batch_size, 1], containing the score
for an image being real for each input image.
"""
with tf.variable_scope("discriminator"):
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
h_1 = leaky_relu(tf.layers.conv2d(x, 32, 5))
m_1 = tf.layers.max_pooling2d(h_1, 2, 2)
h_2 = leaky_relu(tf.layers.conv2d(m_1, 64, 5))
m_2 = tf.layers.max_pooling2d(h_2, 2, 2)
m_2 = tf.contrib.layers.flatten(m_2)
h_3 = leaky_relu(tf.layers.dense(m_2, 4*4*64))
logits = tf.layers.dense(h_3, 1)
return logits
while the code for the generator (architecture of InfoGAN paper) is:
def generator(z):
"""Generate images from a random noise vector.
Inputs:
- z: TensorFlow Tensor of random noise with shape [batch_size, noise_dim]
Returns:
TensorFlow Tensor of generated images, with shape [batch_size, 784].
"""
with tf.variable_scope("generator"):
batch_size = tf.shape(z)[0]
fc = tf.nn.relu(tf.layers.dense(z, 1024))
bn_1 = tf.layers.batch_normalization(fc)
fc_2 = tf.nn.relu(tf.layers.dense(bn_1, 7*7*128))
bn_2 = tf.layers.batch_normalization(fc_2)
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
c_1 = tf.nn.relu(tf.contrib.layers.convolution2d_transpose(bn_2, 64, 4, 2, padding='valid'))
bn_3 = tf.layers.batch_normalization(c_1)
c_2 = tf.tanh(tf.contrib.layers.convolution2d_transpose(bn_3, 1, 4, 2, padding='valid'))
So far, so good. The number of parameters is correct (checked it). However, I am having some problems in the next block of code:
tf.reset_default_graph()
# number of images for each batch
batch_size = 128
# our noise dimension
noise_dim = 96
# placeholder for images from the training dataset
x = tf.placeholder(tf.float32, [None, 784])
# random noise fed into our generator
z = sample_noise(batch_size, noise_dim)
# generated images
G_sample = generator(z)
with tf.variable_scope("") as scope:
#scale images to be -1 to 1
logits_real = discriminator(preprocess_img(x))
# Re-use discriminator weights on new inputs
scope.reuse_variables()
logits_fake = discriminator(G_sample)
# Get the list of variables for the discriminator and generator
D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')
# get our solver
D_solver, G_solver = get_solvers()
# get our loss
D_loss, G_loss = gan_loss(logits_real, logits_fake)
# setup training steps
D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
G_train_step = G_solver.minimize(G_loss, var_list=G_vars)
D_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'discriminator')
G_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'generator')
The problem I am getting is where I am doing the reshape in the discriminator, and the error says:
ValueError: None values not supported.
Sure, the value for the batch_size is None (btw, the same error I am getting even where I am changing it to some number), but shape function (as far as I understand) should get the dynamic shape, not the static one. I think that I am a bit lost here.
For what is worth, I am giving here the link to the entire notebook I am working: https://github.com/TheRevanchist/GANs/blob/master/GANs-TensorFlow.ipynb if someone wants to look at it.
NB: The code here is part of the Stanford CS231n assignment. I have no affiliation with Stanford though, so it isn't homework cheating (proof: the course is finished months ago).
The generator seems to be the problem. The output size should match the discriminator. And the other issues are batch norm should be applied before the activation unit. I have modified the code:
with tf.variable_scope("generator"):
fc = tf.layers.dense(z, 4*4*128)
bn_1 = leaky_relu(tf.layers.batch_normalization(fc))
bn_1 = tf.reshape(bn_1, [-1, 4, 4, 128])
c_1 = tf.layers.conv2d_transpose(bn_1, 64, 5, strides=2, padding='same')
bn_2 = leaky_relu(tf.layers.batch_normalization(c_1))
c_2 = tf.layers.conv2d_transpose(bn_2, 32, 5, strides=2, padding='same')
bn_3 = leaky_relu(tf.layers.batch_normalization(c_2))
c_3 = tf.layers.conv2d_transpose(bn_3, 1, 5, strides=2, padding='same')
c_3 = tf.layers.batch_normalization(c_3)
c_3 = tf.image.resize_images(c_3, (28, 28))
c_3 = tf.contrib.layers.flatten(c_3)
c_3 = tf.tanh(c_3)
return c_3
Your code gives the below output when run with the above changes
Instead of passing None to reshape you must pass -1.
So this:
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
becomes
x = tf.reshape(x, [-1, 28, 28, 1])
and this:
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
becomes:
bn_2 = tf.reshape(bn_2, [-1, 7, 7, 128])
It will infer the batch size from the rest of the shape you provided.

How to get CNN kernel values in Tensorflow

I am using the code below to create CNN layers.
conv1 = tf.layers.conv2d(inputs = input, filters = 20, kernel_size = [3,3],
padding = "same", activation = tf.nn.relu)
and I want to get the values of all kernels after training. It does not work it I simply do
kernels = conv1.kernel
So how should I retrieve the value of these kernels? I am also not sure what variables and method does conv2d has since tensorflow don't really tell it in conv2d class.
You can find all the variables in list returned by tf.global_variables() and easily lookup for variable you need.
If you wish to get these variables by name, declare a layer as:
conv_layer_1 = tf.layers.conv2d(activation=tf.nn.relu,
filters=10,
inputs=input_placeholder,
kernel_size=(3, 3),
name="conv1", # NOTE THE NAME
padding="same",
strides=(1, 1))
Recover the graph as:
gr = tf.get_default_graph()
Recover the kernel values as:
conv1_kernel_val = gr.get_tensor_by_name('conv1/kernel:0').eval()
Recover the bias values as:
conv1_bias_val = gr.get_tensor_by_name('conv1/bias:0').eval()
You mean you want to get the value of the weights for the conv1 layer.
You haven't actually defined the weights with conv2d, you need to do that. When I create a convolutional layer I use a function that performs all the necessary steps, here's a copy/paste of the function I use to create a each of my convolutional layers:
def _conv_layer(self, name, in_channels, filters, kernel, input_tensor, strides, dtype=tf.float32):
with tf.variable_scope(name):
w = tf.get_variable("w", shape=[kernel, kernel, in_channels, filters],
initializer=tf.contrib.layers.xavier_initializer_conv2d(), dtype=dtype)
b = tf.get_variable("b", shape=[filters], initializer=tf.constant_initializer(0.0), dtype=dtype)
c = tf.nn.conv2d(input_tensor, w, strides, padding='SAME', name=name + "c")
a = tf.nn.relu(c + b, name=name + "_a")
print name + "_a", a.get_shape().as_list(), name + "_w", w.get_shape().as_list(), \
"params", np.prod(w.get_shape().as_list()[1:]) + filters
return a, w.get_shape().as_list()
This is what I use to define 5 convolutional layers, this example is straight out of my code, so note that it's 5 convolutional layers stacked without using max pooling or anything, strides of 2 and 5x5 kernels.
conv1_a, _ = self._conv_layer("conv1", 3, 24, 5, self.imgs4d, [1, 2, 2, 1]) # 24.8 MiB/feature -> 540 x 960
conv2_a, _ = self._conv_layer("conv2", 24, 80, 5, conv1_a, [1, 2, 2, 1]) # 6.2 MiB -> 270 x 480
conv3_a, _ = self._conv_layer("conv3", 80, 256, 5, conv2_a, [1, 2, 2, 1]) # 1.5 MiB -> 135 x 240
conv4_a, _ = self._conv_layer("conv4", 256, 750, 5, conv3_a, [1, 2, 2, 1]) # 0.4 MiB -> 68 x 120
conv5_a, _ = self._conv_layer("conv5", 750, 2048, 5, conv4_a, [1, 2, 2, 1]) # 0.1 MiB -> 34 x 60
There's also a good tutorial on the tensorflow website on how to set up a convolutional network:
https://www.tensorflow.org/tutorials/deep_cnn
The direct answer to your question is that the weights for the convolutional layer are defined there as w, that's the tensor you're asking about if I understand you correctly.