I am trying to create a CNN model using hyperparameterization for image classification. When I run the code I receive the following error:
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 32, 32, 32, 3), found shape=(32, 32, 32, 3)
How to fix the error? Here is the whole code pasted below:
# first we create our actual code which requires the arguments, units, activation, dropout, lr:
def build_model(hp):
model = ks.Sequential([
# adding first conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
),
# adding second conv2d layer
ks.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (32,32,32,3)
)])
model.add(layers.Flatten())
# Let's tune the number of Dense layers.
for i in range(hp.Int("num_dense_layers", 1, 3)):
model.add(
layers.Dense(
# Let's tune the number of units separately
units = hp.Int(f"units_{i}", min_value=1, max_value = 100, step = 16),
activation = hp.Choice("activation", ["relu", "tanh", "softmax"])
))
if hp.Boolean("dropout"):
model.add(layers.Dropout(rate = 0.25))
model.add(layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = ks.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
build_model(keras_tuner.HyperParameters())
You are getting this error due the input shape mismatch.
Here i have implemented the hypermodel on the mnist fashion dataset which contains images of shape (28,282,1).
def build_model(hp):
model = tf.keras.Sequential([
tf.keras.Input(shape=(28,28,1)),
# adding first conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_1_filter", min_value=1,max_value=100, step = 16),
kernel_size = hp.Choice("conv_1_kernel", values = [3,5]),
activation = hp.Choice("conv_1_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
),
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_1',values=[2,3])),
# adding second conv2d layer
tf.keras.layers.Conv2D(
#Let's tune the filters, kernel_size, activation function.
filters = hp.Int("conv_2_filter", min_value=1,max_value=50, step = 16),
kernel_size = hp.Choice("conv_2_kernel", values = [3,5]),
activation = hp.Choice("conv_2_activation", ["relu", "tanh", "softmax"]),
input_shape = (28,28,1)
)])
tf.keras.layers.MaxPooling2D(
pool_size=hp.Choice('pooling_2',values=[2,3])),
model.add(tf.keras.layers.Flatten())
if hp.Boolean("dropout"):
model.add(tf.keras.layers.Dropout(rate = 0.25))
model.add(tf.keras.layers.Dense(10, activation = "softmax"))
learning_rate = hp.Float("lr", min_value = 1e-4, max_value = 1e-2, sampling="log")
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
loss = "categorical_crossentropy",
metrics = ["accuracy"]
)
return model
By providing the correct shape you will not get any error.
For more details, Please refer to this gist and this documentation. Thank You!
Related
I'm using keras to implement a functional CNN model where I have images with the size of 64x64x1. with 6 convolutional layer like this :
num_classes = 5
def get_model():
##creating CNN functional api for learning
input_ = keras.layers.Input(shape=[64, 64,1])
##first layer of convolutional layer
Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(input_layer)
#second convolutional layer
Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv1)
#third convolutional layer
Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)(Conv12)
##i add max pooling with a stride of 2
Max1 = keras.layers.MaxPool2D(2, strides=2)(Conv13)
##i add a second layer of convlutional layer
Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Max1)
##adding second convolutional layer
Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv2)
##adding third convolutional layer
Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)(Conv21)
##i add another layer of max pooling
Max2 = keras.layers.MaxPool2D(2, strides=2)(Conv23)
##here i execute data flatting, i will change this to use attention layer Att.
Flat = keras.layers.Flatten()(Max2)
#i add another dense architecture
Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)(Flat)
Dense2= keras.layers.Dense(700,activation=tf.nn.relu)(Dense1)
#i add now the output layer with softmax
# Output layer, class prediction.
output = keras.layers.Dense(num_classes,activation=tf.nn.softmax)(Dense2)
model = Model(inputs=input_, outputs=output)
##end of creating CNN using functional api
##defining loss function and training data and epoche. I modify the optimizer to rmsprop
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer_rmsprop,metrics=["accuracy"])
###i return the model
return model
to get better performance i want to add this attention layer to the above CNN :
#already imported
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from keras.layers import Dropout
# Variable-length int sequences. 64*64*1
query_input = tf.keras.Input(shape=(4096,), dtype='int32')
value_input = tf.keras.Input(shape=(4096,), dtype='int32')
# Embedding lookup.
token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = token_embedding(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = token_embedding(value_input)
# CNN layer.
cnn_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
# Use 'same' padding so outputs have the same shape as inputs.
padding='same')
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = cnn_layer(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = cnn_layer(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = tf.keras.layers.Attention()(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
query_seq_encoding)
query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
but the problem is i don't know how to link the attention layer with my cnn model because when i connect the first convolutional network with the attention layer like this :
Conv1 = keras.layers.Conv2D(32, kernel_size=5)(input_layer)
I get this error :
ValueError: Input 0 of layer "conv2d" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (None, 200)
can someone show me how to add an attention layer to the CNN model.
Updated Answer...
keras.backend.clear_session()
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self,
output_dims):
super(AttentionLayer, self).__init__()
self.output_dims = output_dims
self.embeddings = tf.keras.layers.Embedding(input_dim=4096, output_dim = output_dims)
self.conv = tf.keras.layers.Conv1D(2048, 4 , padding='same')
self.attn_layer = tf.keras.layers.Attention()
self.global_pooling_1 = tf.keras.layers.GlobalAveragePooling1D()
self.global_pooling_2 = tf.keras.layers.GlobalAveragePooling1D()
def call (self, query_input, value_input):
batch_size = tf.shape(query_input)[0]
query_input = tf.reshape(query_input, (batch_size, 4096))
value_input = tf.reshape(value_input, (batch_size, 4096))
# Query embeddings of shape [batch_size, Tq, dimension].
query_embeddings = self.embeddings(query_input)
# Value embeddings of shape [batch_size, Tv, dimension].
value_embeddings = self.embeddings(value_input)
# Query encoding of shape [batch_size, Tq, filters].
query_seq_encoding = self.conv(query_embeddings)
# Value encoding of shape [batch_size, Tv, filters].
value_seq_encoding = self.conv(value_embeddings)
# Query-value attention of shape [batch_size, Tq, filters].
query_value_attention_seq = self.attn_layer(
[query_seq_encoding, value_seq_encoding])
# Reduce over the sequence axis to produce encodings of shape
# [batch_size, filters].
query_encoding = self.global_pooling_1(
query_seq_encoding)
query_value_attention = self.global_pooling_2(
query_value_attention_seq)
# Concatenate query and document encodings to produce a DNN input layer.
input_layer = tf.keras.layers.Concatenate()(
[query_encoding, query_value_attention])
input_layer = tf.reshape(input_layer , (batch_size, 64,64 ,1))
return input_layer
keras.backend.clear_session()
num_classes = 5
class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.attn_layer = AttentionLayer(64)
##first layer of convolutional layer
self.Conv1 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#second convolutional layer
self.Conv12 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
#third convolutional layer
self.Conv13 = keras.layers.Conv2D(32, kernel_size=5,activation=tf.nn.relu)
##i add max pooling with a stride of 2
self.Max1 = keras.layers.MaxPool2D(2, strides=2)
##i add a second layer of convlutional layer
self.Conv2 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding second convolutional layer
self.Conv21 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##adding third convolutional layer
self.Conv23 = keras.layers.Conv2D(64, kernel_size=5,activation=tf.nn.relu)
##i add another layer of max pooling
self.Max2 = keras.layers.MaxPool2D(2, strides=2)
##here i execute data flatting, i will change this to use attention layer Att.
self.Flat = keras.layers.Flatten()
#i add another dense architecture
self.Dense1= keras.layers.Dense(2048,activation=tf.nn.relu)
self.Dense2= keras.layers.Dense(700,activation=tf.nn.relu)
#i add now the output layer with softmax
# Output layer, class prediction.
self.outputs = keras.layers.Dense(num_classes,activation=tf.nn.softmax)
def call(self, x):
x = self.attn_layer(x , x)
x = self.Conv1(x)
x = self.Conv12(x)
x = self.Conv13(x)
x = self.Max1(x)
x = self.Conv2(x)
x = self.Conv21(x)
x = self.Conv23(x)
x = self.Max2(x)
x = self.Flat(x)
x = self.Dense1(x)
x = self.Dense2(x)
return self.outputs(x)
model = Model()
x = np.random.randint(0 ,2 , size = (8, 64, 64,1))
y = np.random.randint(0,5, size=(8,1))
print(model(x).shape)
optimize_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, epsilon=1e-08, decay=0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimize_rmsprop,metrics=["accuracy"])
model.fit(x, y, epochs=1)
Output:
I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.
I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)
def head(self, input, num_anchors, name, flatten=False):
out_channels = (self.num_classes + 4) * num_anchors
conv = layers.Conv2D(256, 3, 1, 'same', activation='relu', name=name+'_conv1')(input)
conv = layers.Conv2D(256, 3, 1, 'same', activation='relu', name=name+'_conv2')(conv)
conv = layers.Conv2D(256, 3, 1, 'same', activation='relu', name=name+'_conv3')(conv)
out = layers.Conv2D(out_channels, 3, 1, 'same', name=name+'output')(conv)
if flatten is True:
batch_size = tf.shape(out)[0]
out = tf.reshape(out, [batch_size, -1, num_anchors, self.num_classes+4])
out = tf.reshape(out, [batch_size, -1, self.num_classes+4])
return out
I want to know how to reuse these layers as tf.variable_scope(scope resue=tf.AUTO_REUSE) in tensorflow1
In tensorflow1
with tf.variable_scope('', resue=tf.AUTO_REUSE) as scope:
all layers here could be auto reuse
You can reuse the layers by just having a common reference. I have attached a sample code below. I am making use of a variable named as common_layer which will be used in three separate models (sequential and functional). The first model is trained and after that we subtract the weights from common_layer of all the three models. It proves that what changes happen in first model's layer gets reflected in other model's common layer.
import tensorflow as tf
import numpy as np
common_layer = tf.keras.layers.Dense(100, name='common_layer')
model1 = tf.keras.models.Sequential([
tf.keras.layers.Input((100)),
common_layer,
tf.keras.layers.Dense(1)
])
model2 = tf.keras.models.Sequential([
tf.keras.layers.Input((100)),
common_layer,
tf.keras.layers.Dense(10)
])
input_layer = tf.keras.layers.Input((100))
output_layer = common_layer(input_layer)
output_layer = tf.keras.layers.Dense(20)(output_layer)
model3 = tf.keras.Model(inputs=[input_layer], outputs=[output_layer])
model1.compile('adam', loss='mse')
model1.fit(np.random.rand(128, 100), np.random.rand(128, 1))
weights1 = model1.get_weights()[0]
weights2 = model2.get_weights()[0]
weights3 = model3.get_weights()[0]
print(np.sum(weights1 - weights2)) # 0.0
print(np.sum(weights1 - weights3)) # 0.0
My model:
model = Sequential()
model.add( LSTM(25, batch_input_shape = (None, None, 19), return_sequences = True ) )
model.add(Dense(4, activation = 'tanh'))
model.compile(loss='mean_squared_error', optimizer ='adam', metrics = ['accuracy'])
some example of input data shape:
input_list[0].shape = (7,19)
input_list[1].shape = (8,19)
input_list[2].shape = (17,19)
some example of output data shape:
output_list[0].shape = (7,4)
output_list[1].shape = (8,4)
output_list[2].shape = (17,4)
input_list.shape = (233,)
output_list.shape = (233,)
error while:
d_loss = model.fit(input_list,output_list,validation_split=0.33,nb_epoch=100,verbose=1,shuffle=True, batch_size = 1)
error: ValueError: Error when checking input: expected lstm_22_input to have 3 dimensions, but got array with shape (233, 1)
Just increase the dimensions, by np.expand_dims(x, axis= 0). It will become three dimensional.
I have built CNN model by using the principle of "Model Sublclassing" in Keras. Here is the class which represents my model:
class ConvNet(tf.keras.Model):
def __init__(self, data_format, classes):
super(ConvNet, self).__init__()
if data_format == "channels_first":
axis = 1
elif data_format == "channels_last":
axis = -1
self.conv_layer1 = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer1 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer2 = tf.keras.layers.Conv2D(filters = 64, kernel_size = 3,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer2 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))
self.conv_layer3 = tf.keras.layers.Conv2D(filters = 128, kernel_size = 5,strides = (1,1),
padding = "same",activation = "relu")
self.pool_layer3 = tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides = (1,1),
padding = "same")
self.flatten = tf.keras.layers.Flatten()
self.dense_layer1 = tf.keras.layers.Dense(units = 512, activation = "relu")
self.dense_layer2 = tf.keras.layers.Dense(units = classes, activation = "softmax")
def call(self, inputs, training = True):
output_tensor = self.conv_layer1(inputs)
output_tensor = self.pool_layer1(output_tensor)
output_tensor = self.conv_layer2(output_tensor)
output_tensor = self.pool_layer2(output_tensor)
output_tensor = self.conv_layer3(output_tensor)
output_tensor = self.pool_layer3(output_tensor)
output_tensor = self.flatten(output_tensor)
output_tensor = self.dense_layer1(output_tensor)
return self.dense_layer2(output_tensor)
I would like to know how to train it "eagerly", and by that I mean avoiding the use of compile and fit methods.
I am not sure how to exactly construct the training loop. I understand that I must perform tf.GradientTape.gradient() function in order to calculate the gradients and then use optimizers.apply_gradients() in order to update my model parameters.
What I do not understand is how can I make predictions with my model in order to get logits and then use them to calculate the loss. If someone could help me with the idea of how to construct the training loop I would really appreciate it.
Eager execution is the imperative programming mode to let developers follow Python's natural control flow. Essentially, you wouldn't need to first create placeholders, computational graphs and then execute them in TensorFlow sessions. You can use automatic differentiation to compute gradients in your training loop:
for i in range(iterations):
with tf.GradientTape() as tape:
logits = model(batch_examples, training = True)
loss = tf.losses.sparse_softmax_cross_entropy(batch_labels, logits)
grads = tape.gradient(loss, model.trainable_variables)
opt.apply_gradients([grads, model.trainable_variables])
This is assuming that the model is of the class Model from Keras. I hope this solves your problem! You should also check out the TensorFlow Guide on Eager Execution.