Why is Tensorflow "save_model" failing? - tensorflow

I'm working in Tensorflow 2.0.0, and trying to save a model. Here's the code I'm using (thanks to #m-innat for suggesting to simplify the
example model)
class SimpleModel( tf.keras.Model ):
def __init__( self, **kwargs ):
super( SimpleModel, self ).__init__( **kwargs )
self.conv = tf.keras.layers.Conv1D( filters = 5, kernel_size = 3, padding = "SAME" )
self.dense = tf.keras.layers.Dense( 1 )
def call( self, x ):
x = self.conv( x )
x = self.dense( x )
return x
simple_model = SimpleModel()
input_shape = ( 3, 4, 5 )
x = tf.random.normal( shape = input_shape )
y = tf.random.normal( shape = ( 3, 4, 1 ) )
y_pred = simple_model( x )
print( "y_pred", y_pred )
tf.keras.models.save_model( translation_model,
"/content/gdrive/MyDrive/SimpleModel.tf", save_format = "tf" )
However, the save_model call gives an error:
AttributeError: 'NoneType' object has no attribute 'shape'
Nothing in the call stack suggests what the underlying problem is. Can you please help?

The error is related to the fact that the input shapes of the layers are not set. This can be done by calling one time the methods simple_model.fit or simple_model.predict.
For example, in your code, you can call y_pred = simple_model.predict( x ).
In this way, the model is correctly saved as I checked in the code below.
import tensorflow as tf
class SimpleModel( tf.keras.Model ):
def __init__( self, **kwargs ):
super( SimpleModel, self ).__init__( **kwargs )
self.conv = tf.keras.layers.Conv1D( filters = 5, kernel_size = 3, padding = "SAME" )
self.dense = tf.keras.layers.Dense( 1 )
def call( self, x ):
x = self.conv( x )
x = self.dense( x )
return x
simple_model = SimpleModel()
input_shape = ( 3, 4, 5 )
x = tf.random.normal( shape = input_shape )
y = tf.random.normal( shape = ( 3, 4, 1 ) )
y_pred = simple_model.predict( x )
print( "y_pred", y_pred )
tf.keras.models.save_model( simple_model,
"/content/gdrive/MyDrive/SimpleModel.tf", save_format = "tf" )
# Output:
# y_pred [[[-0.4533468 ]
# [ 1.3261242 ]
# [-1.0296338 ]
# [-1.1136482 ]] ...
model = tf.keras.models.load_model('/content/gdrive/MyDrive/SimpleModel.tf')
model.predict(x)
# Output:
#array([[[-0.4533468 ],
# [ 1.3261242 ],
# [-1.0296338 ],
# [-1.1136482 ]], ...

Related

the param of attention layer is 0

when I build multi_head_self_attention ,I found the param of this layer is 0,what is wrong with this attention layer?what should i do to modify this layer?
I initialize query, key, value in init,and by attention function ,I can get the result of query\key\value
class MultiHeadSelfAttention(Layer):
def __init__(self, embed_dim, num_heads): **num_heads represent the num of heads**
super(MultiHeadSelfAttention, self).__init__()
self.embed_dim = embed_dim
self.num_heads = num_heads
if embed_dim % num_heads != 0:
raise ValueError(
f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
)
self.projection_dim = embed_dim // num_heads
self.query_dense = Dense(embed_dim,use_bias=False)
self.key_dense = Dense(embed_dim,use_bias=False)
self.value_dense = Dense(embed_dim,use_bias=False)
self.combine_heads = Dense(embed_dim,use_bias=False)
def attention(self, query, key, value):
score = tf.matmul(query, key, transpose_b=True)
dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
scaled_score = score / tf.math.sqrt(dim_key)
weights = tf.nn.softmax(scaled_score, axis=-1)
output = tf.matmul(weights, value)
return output, weights
def separate_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs):
**x.shape = [batch_size, seq_len, embedding_dim]**
batch_size = tf.shape(inputs)[0]
query = self.query_dense(inputs)
key = self.key_dense(inputs)
value = self.value_dense(inputs)
query = self.separate_heads(
query, batch_size
)
key = self.separate_heads(
key, batch_size
)
value = self.separate_heads(
value, batch_size
)
attention, weights = self.attention(query, key, value)
attention = tf.transpose(
attention, perm=[0, 2, 1, 3]
) * (batch_size, seq_len, num_heads, projection_dim)*
concat_attention = tf.reshape(
attention, (batch_size, -1, self.embed_dim)
) * (batch_size, seq_len, embed_dim)*
output = self.combine_heads(
concat_attention
) * (batch_size, seq_len, embed_dim)*
return output
x = MultiHeadSelfAttention(embed_dim, num_heads)(embed_input)

Mobilevit Binary classification ValueError: `logits` and `labels` must have the same shape, received ((None, 2) vs (None, 1))

I am using the colab notebook(https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/mobilevit.ipynb) for mobilevit to train on a dataset I have of 25k pictures for 2 classes. Since it's a binary classification, I have used keras.losses.BinaryCrossentropy and Sigmoid as activation function at the last layer:-
def create_mobilevit(num_classes=2):
inputs = keras.Input((image_size, image_size, 3))
x = layers.Rescaling(scale=1.0 / 255)(inputs)
# Initial conv-stem -> MV2 block.
x = conv_block(x, filters=16)
x = inverted_residual_block(
x, expanded_channels=16 * expansion_factor, output_channels=16
)
# Downsampling with MV2 block.
x = inverted_residual_block(
x, expanded_channels=16 * expansion_factor, output_channels=24, strides=2
)
x = inverted_residual_block(
x, expanded_channels=24 * expansion_factor, output_channels=24
)
x = inverted_residual_block(
x, expanded_channels=24 * expansion_factor, output_channels=24
)
# First MV2 -> MobileViT block.
x = inverted_residual_block(
x, expanded_channels=24 * expansion_factor, output_channels=48, strides=2
)
x = mobilevit_block(x, num_blocks=2, projection_dim=64)
# Second MV2 -> MobileViT block.
x = inverted_residual_block(
x, expanded_channels=64 * expansion_factor, output_channels=64, strides=2
)
x = mobilevit_block(x, num_blocks=4, projection_dim=80)
# Third MV2 -> MobileViT block.
x = inverted_residual_block(
x, expanded_channels=80 * expansion_factor, output_channels=80, strides=2
)
x = mobilevit_block(x, num_blocks=3, projection_dim=96)
x = conv_block(x, filters=320, kernel_size=1, strides=1)
# Classification head.
x = layers.GlobalAvgPool2D()(x)
outputs = layers.Dense(num_classes, activation="sigmoid")(x)
return keras.Model(inputs, outputs)
And here's my dataset preparation cell:-
batch_size = 64
auto = tf.data.AUTOTUNE
resize_bigger = 512
num_classes = 2
def preprocess_dataset(is_training=True):
def _pp(image, label):
if is_training:
# Resize to a bigger spatial resolution and take the random
# crops.
image = tf.image.resize(image, (resize_bigger, resize_bigger))
image = tf.image.random_crop(image, (image_size, image_size, 3))
image = tf.image.random_flip_left_right(image)
else:
image = tf.image.resize(image, (image_size, image_size))
label = tf.one_hot(label, depth=num_classes)
return image, label
return _pp
def prepare_dataset(dataset, is_training=True):
if is_training:
dataset = dataset.shuffle(batch_size * 10)
dataset = dataset.map(preprocess_dataset(is_training), num_parallel_calls=auto)
return dataset.batch(batch_size).prefetch(auto)
And this is the cell for training the model:-
learning_rate = 0.002
label_smoothing_factor = 0.1
epochs = 30
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
loss_fn = keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing_factor)
def run_experiment(epochs=epochs):
mobilevit_xxs = create_mobilevit(num_classes=num_classes)
mobilevit_xxs.compile(optimizer=optimizer, loss=loss_fn, metrics=["accuracy"])
checkpoint_filepath = "/tmp/checkpoint"
checkpoint_callback = keras.callbacks.ModelCheckpoint(
checkpoint_filepath,
monitor="val_accuracy",
save_best_only=True,
save_weights_only=True,
)
mobilevit_xxs.fit(
train_ds,
validation_data=val_ds,
epochs=epochs,
callbacks=[checkpoint_callback],
)
mobilevit_xxs.load_weights(checkpoint_filepath)
_, accuracy = mobilevit_xxs.evaluate(val_ds)
print(f"Validation accuracy: {round(accuracy * 100, 2)}%")
return mobilevit_xxs
mobilevit_xxs = run_experiment()
Basically the code is identical to https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/mobilevit.ipynb except for the change in BinaryCrossEntropy loss and Sigmoid as actv. func. I don't understand why I am getting this even though I am explicitly ont-hot-coded my class labels -
ValueError: `logits` and `labels` must have the same shape, received ((None, 2) vs (None, 1)).
You need to change the num_classes = 1 instead of num_classes = 2 as you have used Sigmoid activation function which returns the values between 0 to 1 for binary classification(0,1).
The values <0.5 will be considered as class 0 and values >0.5 will be as class 1 in between two binary classes (0,1).
Please refer to the replicated gist for your reference.

Why the value of a variable inside a tensorflow graph not frozen

I'm trying to save and load the graph of a tensorflow module that contains a tf.Variable as an intern variable.
Here is the code:
import tensorflow as tf
class MyModule(tf.Module):
def __init__(self, v):
self.v = v
pass
#tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.int32), tf.TensorSpec(shape=None, dtype=tf.int32)])
def __call__(self, x, v):
self.v.assign( x * v, read_value=False )
return self.v
x = tf.constant( tf.random.uniform(shape=[2,1], maxval=3, dtype=tf.int32) )
v = tf.Variable([[1], [2]])
module = MyModule(v)
#############################################
x = tf.constant( tf.random.uniform(shape=[3,1], maxval=3, dtype=tf.int32) )
v = tf.Variable([[1], [2], [3]])
module = MyModule(v)
tf.saved_model.save(module, "module")
imported = tf.saved_model.load("module")
x = tf.constant([80,0,20,24,321])
v = tf.Variable(3*tf.ones_like(x), trainable=False)
result = imported(x,v)
print(result)
The output is this:
tf.Tensor([240 0 60 72 963], shape=(5,), dtype=int32)
My question is the following:
Given that the graph has been saved, why the value of the variable self.v can still be changed. Isn't is supposed to be frozen.

Bad tensor shape when using tensorflow drop_remainder

I am trying to create a tf.data.Dataset from a generator. I want to make sure all of my batches have the exact same size, so I'm calling .batch(batch_size, drop_remainder=True) on my Dataset. Here's the relevant code:
train_data = tf.data.Dataset.from_generator(
lambda: map(tuple, train_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
),
)
val_data = tf.data.Dataset.from_generator(
lambda: map(tuple, val_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
tf.TensorShape([batch_size, crop_height, crop_width, 3]),
),
)
my_train_data = train_data.batch(batch_size, drop_remainder=True)
my_val_data = val_data.batch(batch_size, drop_remainder=True)
But I get this error when I run it:
tensorflow.python.framework.errors_impl.InvalidArgumentError: input must be 4-dimensional[4,4,64,64,48] [Op:FusedBatchNormV3]
I get this error because I'm batching the data twice (batch_size is 4 in my error message). I tried to replace the batch_size with None in the .from_generator command, but I get the same error. If I remove the first argument completely like so:
(tf.TensorShape([options["crop_height"], options["crop_width"], 3]),
tf.TensorShape([options["crop_height"], options["crop_width"], 3]),
)
I get this error:
ValueError: `generator` yielded an element of shape (4, 128, 128, 3) where an element of shape (128, 128, 3) was expected.
How can I use drop_remainder without batching the data twice?
EDIT:
Adding code associated with generators:
class BaseGenerator(Sequence):
def __init__(
self,
image_filenames,
label_filenames,
batch_size=1,
is_train=True,
preprocess=None,
augment=None,
height=128,
width=128,
shuffle=False,
):
self.indices = np.arange(0, len(image_filenames))
self.image_filenames = np.array(image_filenames)
self.label_filenames = np.array(label_filenames)
self.batch_size = batch_size
self.is_train = is_train
self.preprocess = preprocess
self.augment = augment
self.crop_height = height
self.crop_width = width
self.shuffle = shuffle
self.on_epoch_end() # shuffle data
def __len__(self):
return int(np.ceil(len(self.indices) / float(self.batch_size)))
def __getitem__(self, index):
min_index = index * self.batch_size
max_index = min((index + 1) * self.batch_size, len(self.indices))
batch_indices = self.indices[min_index:max_index]
return self.generate(self.image_filenames[batch_indices], self.label_filenames[batch_indices])
def __call__(self):
return next(iter(self))
def on_epoch_end(self):
if self.is_train and self.shuffle:
np.random.shuffle(self.indices)
def generate(self, image_filenames, label_filenames):
X = np.zeros((self.batch_size, self.crop_height, self.crop_width, 3), dtype=np.float32)
y = np.zeros((self.batch_size, self.crop_height, self.crop_width), dtype=np.float32,)
for i, (image_fn, label_fn) in enumerate(zip(image_filenames, label_filenames)):
image = utils.load_image(image_fn)
label = utils.load_image(label_fn)
if self.augment:
augmented = self.augment(image=image, mask=label)
image = augmented["image"]
label = augmented["mask"]
if self.preprocess:
image = self.preprocess(image)
label = np.float32(helpers.one_hot_it(label=label))
X[i, :, :, :] = image
y[i, :, :, :] = label
return X, y
train_generator = BaseGenerator(
image_filenames=train_input_names,
label_filenames=train_output_names,
batch_size=batch_size,
is_train=True,
preprocess=preprocessing,
augment=None,
height=128,
width=128,
)
val_generator = BaseGenerator(
image_filenames=val_input_names,
label_filenames=val_output_names,
batch_size=batch_size,
is_train=False,
preprocess=preprocessing,
augment=None,
height=128,
width=128,
)
As you mentioned in the question, the issue is that you are batching your data twice. To overcome this problem, you can:
First, define a generator that yields single images (e.g. without batch dimension).
Then, group your examples into batches using the method batch of tf.data.Dataset.
In order to redefine BaseGenerator so that it yields single images, you can follow the next steps.
First, in the __init__ method, remove batch_size because it is no longer needed:
def __init__(
self,
image_filenames,
label_filenames,
is_train=True,
preprocess=None,
augment=None,
height=128,
width=128,
shuffle=False,
):
self.indices = np.arange(0, len(image_filenames))
self.image_filenames = np.array(image_filenames)
self.label_filenames = np.array(label_filenames)
self.is_train = is_train
self.preprocess = preprocess
self.augment = augment
self.crop_height = height
self.crop_width = width
self.shuffle = shuffle
self.on_epoch_end() # shuffle data
Second, adapt the method generate so that it yields a single example:
def generate(self, image_filename, label_filename):
image = utils.load_image(image_filename)
label = utils.load_label(label_filename)
if self.augment:
augmented = self.augment(image=image, mask=label)
image = augmented["image"]
label = augmented["mask"]
if self.preprocess:
image = self.preprocess(image)
label = np.float32(helpers.one_hot_it(label=label))
X = image # Shape=(self.crop_height, self.crop_width, 3)
Y = label # Shape=(self.crop_height, self.crop_width)
return X, y
Third, in the method __getitem__, pass only one filename:
def __getitem__(self, index):
return self.generate(self.image_filenames[index], self.label_filenames[index])
Finally, exclude the batch dimension when defining your tf.data.Dataset:
train_data = tf.data.Dataset.from_generator(
lambda: map(tuple, train_generator),
(tf.float32, tf.float32),
(
tf.TensorShape([crop_height, crop_width, 3]),
tf.TensorShape([crop_height, crop_width]),
),
)
my_train_data = train_data.batch(batch_size, drop_remainder=True)
it = iter(my_train_data)
x, y = next(it)
print(x.shape) # (4, 128, 128, 3)
print(y.shape) # (4, 128, 128)

Undefined output shape of custom Keras layer

I am writing a custom Keras layer that flattens all except the last dimension of the input. However, when feeding the output of the layer into the next layer an error occurs because the output shape of the layer is None in all dimensions.
class FlattenLayers( Layer ):
"""
Takes a nD tensor flattening the middle dimensions, ignoring the edge dimensions.
[ n, x, y, z ] -> [ n, xy, z ]
"""
def __init__( self, **kwargs ):
super( FlattenLayers, self ).__init__( **kwargs )
def build( self, input_shape ):
super( FlattenLayers, self ).build( input_shape )
def call( self, inputs ):
input_shape = tf.shape( inputs )
flat = tf.reshape(
inputs,
tf.stack( [
-1,
K.prod( input_shape[ 1 : -1 ] ),
input_shape[ -1 ]
] )
)
return flat
def compute_output_shape( self, input_shape ):
if not all( input_shape[ 1: ] ):
raise ValueError( 'The shape of the input to "Flatten" '
'is not fully defined '
'(got ' + str( input_shape[ 1: ] ) + '). '
'Make sure to pass a complete "input_shape" '
'or "batch_input_shape" argument to the first '
'layer in your model.' )
output_shape = (
input_shape[ 0 ],
np.prod( input_shape[ 1 : -1 ] ),
input_shape[ -1 ]
)
return output_shape
For example, when a Dense layer follows I receive the error ValueError: The last dimension of the inputs to Dense should be defined. Found None.
Why do you have tf.stack() in new shape? You want to flatten all dimensions except the last one; this is how you could do it:
import tensorflow as tf
from tensorflow.keras.layers import Layer
import numpy as np
class FlattenLayer(Layer):
def __init__( self, **kwargs):
super(FlattenLayer, self).__init__(**kwargs)
def build( self, input_shape ):
super(FlattenLayer, self).build(input_shape)
def call( self, inputs):
new_shape = self.compute_output_shape(tf.shape(inputs))
return tf.reshape(inputs, new_shape)
def compute_output_shape(self, input_shape):
new_shape = (input_shape[0]*input_shape[1]*input_shape[2],
input_shape[3])
return new_shape
Testing with a single data point (tf.__version__=='1.13.1'):
inputs = tf.keras.layers.Input(shape=(10, 10, 1))
res = tf.keras.layers.Conv2D(filters=3, kernel_size=2)(inputs)
res = FlattenLayer()(res)
model = tf.keras.models.Model(inputs=inputs, outputs=res)
x_data = np.random.normal(size=(1, 10, 10, 1))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
evaled = model.outputs[0].eval({model.inputs[0]:x_data})
print(evaled.shape) # (81, 3)