Add extra kernel to a CNN layer while maintaining the weights learned for the other kernels - tensorflow

I'm training a simple feed forward conv neural network on the cifar10 dataset. After running a few epochs, I want to increase the kernel count in the 2nd conv layer from 16 to some number k.
How do I do this while keeping the trained weights for the other kernels and layers in the model intact?
def conv_layer(inp, fltrs):
inp = Conv2D(filters = fltrs, kernel_size = 3, strides = 1, padding = 'valid')(inp)
inp = BatchNormalization()(inp)
inp = Dropout(0.25)(inp)
inp = Activation('relu')(inp)
return inp
inp = Input(shape = (32, 32, 3))
x0 = conv_layer(inp, 8)
x1 = conv_layer(x0, 16)
x2 = MaxPooling2D(pool_size= 2, strides=None, padding='valid', data_format=None)(x1)
x3 = conv_layer(x2, 32)
x4 = conv_layer(x3, 48)
x5 = conv_layer(x4, 64)
x6 = MaxPooling2D(pool_size= 2, strides=None, padding='valid', data_format=None)(x5)
x7 = Flatten()(x6)
x8 = Dense(512)(x7)
x9 = BatchNormalization()(x8)
x10 = Dropout(0.25)(x9)
x11 = Activation('relu')(x10)
x12 = Dense(num_classes, activation='softmax')(x11)
model = Model(inputs = [inp], outputs = [x12])

Related

Problem with classification of flat-color image and image with object (Keras CNN)

I'm using CNN described below from the Keras tutorial to classify two sets of images.
The first set is just a flat color image, and the second set is pictures with an objects.
But can't make CNN to distiguish classes.
How to tune the CNN to distinguish flat-color images and images with objects on it?
(Originally this CNN was designed to distinguish cats and dogs.
https://keras.io/examples/vision/image_classification_from_scratch/#train-the-model)
Flat-color image:
Images with object in it:
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)

('Input has undefined rank:', TensorShape(None)) <- Error in Building ResNet

I am getting an undefined rank error, while building ResNet; I have mentioned a reproducible code below:
Here is my Identity Block:
def IdentityBlock(X, f, filters):
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters = F1, kernel_size = (3, 3), padding = 'valid')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F2, kernel_size = (f, f), padding = 'same')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')(X)
X = BatchNormalization()(X)
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Here is my Convolution Block
def ConvBlock(X, f, filters):
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters = F1, kernel_size = (3, 3), padding = 'valid')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F2, kernel_size = (f, f), padding = 'same')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')(X)
X = BatchNormalization()(X)
X_shortcut = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')
X_shortcut = BatchNormalization()(X_shortcut)
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
And my resnet model:
def ResNet(input_shape = (224, 224, 3)):
X_input = Input(input_shape)
X = Conv2D(64, (7, 7))(X_input)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3))(X)
X = ConvBlock(X, f = 3, filters = [64, 64, 128])
X = IdentityBlock(X, 3, filters = [64, 64, 128])
X = IdentityBlock(X, 3, filters = [64, 64, 128])
X = ConvBlock(X, f = 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
X = MaxPooling2D((2, 2))(X)
model = Model(input = X_input, output = X)
return model
When I call RenNet Like this:
base_model = ResNet50(input_shape=(224, 224, 3))
I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-22-f81766b0bb4e> in <module>
----> 1 base_model = ResNet50(input_shape=(224, 224, 3))
<ipython-input-21-309ae6f634f4> in ResNet50(input_shape)
7 X = MaxPooling2D((3, 3))(X)
8
----> 9 X = ConvBlock(X, f = 3, filters = [64, 64, 128])
10 X = IdentityBlock(X, 3, filters = [64, 64, 128])
11 X = IdentityBlock(X, 3, filters = [64, 64, 128])
<ipython-input-20-aeab857c5df6> in ConvBlock(X, f, filters)
16
17 X_shortcut = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')
---> 18 X_shortcut = BatchNormalization()(X_shortcut)
19
20 X = Add()([X, X_shortcut])
.
.
.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/normalization.py in build(self, input_shape)
296 input_shape = tensor_shape.TensorShape(input_shape)
297 if not input_shape.ndims:
--> 298 raise ValueError('Input has undefined rank:', input_shape)
299 ndims = len(input_shape)
300
ValueError: ('Input has undefined rank:', TensorShape(None))
You are passing the Conv2D operation to the BatchNormalization Layer instead of a tensor. Try changing
X_shortcut = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')
to
X_shortcut = Conv2D(filters = F3, kernel_size = (3, 3), padding = 'same')(X_shortcut)
These were the steps I followed to reach the final solution:
1 - the add function works like add([x,x_shortcut]) , im not too sure about the other way.
2 - Made a change in the variable, instead of using X in the IdentityBlock Function I changed it to input_
3 - The shape of the inputs in the add layer we not in the desired way, so I
played around with the kernel size and stride values (not too good at these)
to make that layer work. Did this for the ConvBlock and IdentityBlock.
4 - Final small error was when calling the keras model its Model(inputs = , outputs = ), input and output don't work.
def IdentityBlock(input_, f, filters):
F1, F2, F3 = filters
X = Conv2D(filters = F1, kernel_size = (1, 1), padding = 'valid') (input_)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F2, kernel_size = (f, f), padding = 'same')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F3, kernel_size = (1, 1), padding = 'same')(X)
X = BatchNormalization()(X)
X = add([X, input_])
X = Activation('relu')(X)
return X
def ConvBlock(X, f, filters):
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters = F1, kernel_size = (1, 1),strides = (1,1), padding
= 'valid')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F2, kernel_size = (f, f), padding = 'same')(X)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = Conv2D(filters = F3, kernel_size = (1, 1), padding = 'same')(X)
X = BatchNormalization()(X)
X_shortcut = Conv2D(filters = F3,padding = 'same' , kernel_size = (1,1)
, strides = (1,1))(X_shortcut)
X_shortcut = BatchNormalization()(X_shortcut)
X = add([X, X_shortcut])
X = Activation('relu')(X)
return X
def ResNet(input_shape = (224, 224, 3)):
X_input = Input(input_shape)
X = ZeroPadding2D((3,3))(X_input)
X = Conv2D(64, (7, 7) , strides = (2,2))(X_input)
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = ZeroPadding2D((1,1))(X)
X = MaxPooling2D((3, 3) , strides = (2,2))(X)
X = ConvBlock(X, f = 3, filters = [64, 64, 128])
X = IdentityBlock(X, 3, filters = [64, 64, 128])
X = IdentityBlock(X, 3, filters = [64, 64, 128])
X = ConvBlock(X, f = 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
X = IdentityBlock(X, 3, filters = [128, 128, 512])
model = Model(inputs = X_input, outputs = X)
I hope it helped!

TensorFlow get stuck after use concatenate layer

I have the next model:
import tensorflow as tf
input1 = tf.keras.layers.Input(shape = (10, 300, 1))
input2 = tf.keras.layers.Input(shape = (24, ))
x = tf.keras.layers.Conv2D(64, (3,3), activation='relu')(input1)
x = tf.keras.layers.MaxPooling2D(2,2)(x)
x = tf.keras.layers.Dropout(0.25)(x)
x = tf.keras.layers.Conv2D(128, (2,2), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(2,2)(x)
x = tf.keras.layers.Dropout(0.25)(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(512, activation = 'relu')(x)
x = tf.keras.layers.Dropout(0.25)(x)
x = tf.keras.layers.Concatenate()([x, input2])
x = tf.keras.layers.Dense(128, activation = 'relu')(x)
x = tf.keras.layers.Dropout(0.25)(x)
output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.models.Model(inputs = [input1,input2], outputs = output)
model.summary()
model.compile(optimizer = 'rmsprop',
loss ='binary_crossentropy',
metrics = ['acc'])
history = model.fit([X_train, X_features], y_train,
batch_size=64,
epochs=100)
But when I try to fit it, get stuck and only appears Epoch 1/100 and nothing more happens even if i let it run for hours. But when I remove the concatenate layer, everything go well. I'm using Google colab. Why is this happening?

How to avoid dying weights/gradients in custom LSTM cell in tensorflow. What shall be ideal loss function?

I am trying to train a name generation LSTM network. I am not using pre-defined tensorflow cells (like tf.contrib.rnn.BasicLSTMCell, etc). I have created LSTM cell myself. But the error is not reducing beyond a limit. It only decreases 30% from what it is initially (when random weights were used in forward propagation) and then it starts increasing. Also, the gradients and weights become very small after few thousand training steps.
I think the reason for non-convergence can be one of two:
1. The design of tensorflow graph i have created OR
2. The loss function i used.
I am feeding one hot vectors of each character of the word for each time-step of the network. The code i have used for graph generation and loss function is as follows. Tx is the number of time steps in RNN, n_x,n_a,n_y are length of the input vectors, LSTM cell vector and output vector respectively.
Will be great if someone can help me in identifying what i am doing wrong here.
n_x = vocab_size
n_y = vocab_size
n_a = 100
Tx = 50
Ty = Tx
with open("trainingnames_file.txt") as f:
examples = f.readlines()
examples = [x.lower().strip() for x in examples]
X0 = [[char_to_ix[x1] for x1 in list(x)] for x in examples]
X1 = np.array([np.concatenate([np.array(x), np.zeros([Tx-len(x)])]) for x in X0], dtype=np.int32).T
Y0 = [(x[1:] + [char_to_ix["\n"]]) for x in X0]
Y1 = np.array([np.concatenate([np.array(y), np.zeros([Ty-len(y)])]) for y in Y0], dtype=np.int32).T
m = len(X0)
Wf = tf.get_variable(name="Wf", shape = [n_a,(n_a+n_x)])
Wu = tf.get_variable(name="Wu", shape = [n_a,(n_a+n_x)])
Wc = tf.get_variable(name="Wc", shape = [n_a,(n_a+n_x)])
Wo = tf.get_variable(name="Wo", shape = [n_a,(n_a+n_x)])
Wy = tf.get_variable(name="Wy", shape = [n_y,n_a])
bf = tf.get_variable(name="bf", shape = [n_a,1])
bu = tf.get_variable(name="bu", shape = [n_a,1])
bc = tf.get_variable(name="bc", shape = [n_a,1])
bo = tf.get_variable(name="bo", shape = [n_a,1])
by = tf.get_variable(name="by", shape = [n_y,1])
X_input = tf.placeholder(dtype = tf.int32, shape = [Tx,None])
Y_input = tf.placeholder(dtype = tf.int32, shape = [Ty,None])
X = tf.one_hot(X_input, axis = 0, depth = n_x)
Y = tf.one_hot(Y_input, axis = 0, depth = n_y)
X.shape
a_prev = tf.zeros(shape = [n_a,m])
c_prev = tf.zeros(shape = [n_a,m])
a_all = []
c_all = []
for i in range(Tx):
ac = tf.concat([a_prev,tf.squeeze(tf.slice(input_=X,begin=[0,i,0],size=[n_x,1,m]))], axis=0)
ct = tf.tanh(tf.matmul(Wc,ac) + bc)
tug = tf.sigmoid(tf.matmul(Wu,ac) + bu)
tfg = tf.sigmoid(tf.matmul(Wf,ac) + bf)
tog = tf.sigmoid(tf.matmul(Wo,ac) + bo)
c = tf.multiply(tug,ct) + tf.multiply(tfg,c_prev)
a = tf.multiply(tog,tf.tanh(c))
y = tf.nn.softmax(tf.matmul(Wy,a) + by, axis = 0)
a_all.append(a)
c_all.append(c)
a_prev = a
c_prev = c
y_ex = tf.expand_dims(y,axis=1)
if i == 0:
y_all = y_ex
else:
y_all = tf.concat([y_all,y_ex], axis=1)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=y_all,dim=0))
opt = tf.train.AdamOptimizer()
train = opt.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o.shape)
print(o)
sess.run(train, feed_dict = {X_input:X1,Y_input:Y1})
o = sess.run(loss, feed_dict = {X_input:X1,Y_input:Y1})
print(o)

Tensorflow - ValueError: Cannot feed value of shape

I have 19 input integer features. Output and labels is 1 or 0. I examine MNIST example from tensorflow website.
My code is here :
validation_images, validation_labels, train_images, train_labels = ld.read_data_set()
print "\n"
print len(train_images[0])
print len(train_labels)
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 19])
y_ = tf.placeholder(tf.float32, shape=[None, 2])
W = tf.Variable(tf.zeros([19,2]))
b = tf.Variable(tf.zeros([2]))
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
start = 0
batch_1 = 50
end = 100
for i in range(1000):
#batch = mnist.train.next_batch(50)
x1 = train_images[start:end]
y1 = train_labels[start:end]
start = start + batch_1
end = end + batch_1
x1 = np.reshape(x1, (-1, 19))
y1 = np.reshape(y1, (-1, 2))
train_step.run(feed_dict={x: x1[0], y_: y1[0]})
I run upper code, I get an error. The compiler says that
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (19,) for Tensor u'Placeholder:0', which has shape '(?, 19)'
How can I handle this error?
Try
train_step.run(feed_dict={x: x1, y_: y1})
You can reshape your feed's value by the following code:
x1 = np.column_stack((x1))
x1 = np.transpose(x1) # if necessary
Thus, the shape of the input value will be (1, 19) instead of (19,)