How to deal with incompatible shapes while trying to fit a model in Tensorflow Keras - tensorflow

So, I have done some modifications with the VGG16 neural network to make a linear-regression and classification model.
And at last I am writing the following code to compile and fit my data into the model to initiate training->
class Facetracker(Model):
# The initialization function-
def __init__(self,eyetracker,**kwargs):
super().__init__(**kwargs)
self.model = eyetracker #Instantiating the model
def compile(self,opt,classlosss,localization_loss,**kwargs):
super().compile(**kwargs)
self.classloss = class_loss
self.localization_loss = regress_loss
self.opt = optimizer
# Defining the training step
def train_step(self,batch,**kwargs):
X,y = batch #unpacking our data
with tf.GradientTape() as tape:
classes,coords = self.model(X,training=True)
batch_classloss = self.classloss(y[0],classes)
batch_localloss = self.localization_loss(tf.cast(y[1],tf.float32),coords)
# calculating total loss-
total_loss = batch_localloss+0.5*batch_classloss
grad = tape.gradient(total_loss,self.model.trainable_variables)
optimizer.apply_gradients(zip(grad,self.model.trainable_variables))
return{
"total_loss":total_loss,
"class_loss":batch_classloss,
"localilzation_loss":batch_localloss
}
def test_step(self,batch):
X,y = batch
classes,coords = self.model(X,training=False)
batch_classloss = self.classloss(y[0],classes)
batch_localloss = self.localization_loss(tf.cast(y[1],tf.float32),coords)
total_loss = batch_localloss+0.5*batch_classloss
return{
"total_loss": total_loss,
"class_loss": batch_classloss,
"localilzation_loss": batch_localloss
}
# def call(self, X, **kwargs):
# return self.model(X,**kwargs)
# Replacing the call function with a lambda function
lambda self,X,**kwargs: self.model(X,**kwargs)
# Subclassing our model-
print("Subclassing.....")
model = Facetracker(facetracker)
print("Compiling......")
model.compile(optimizer,classlosss=class_loss,localization_loss=localization_loss)
# Preparing the log directory
logdir="logdir"
tensorboard_callbacks = tf.keras.callbacks.TensorBoard(log_dir=logdir)
print("Fitting the model")
hist = model.fit(train.take(80),
epochs=16,
initial_epoch =8,
validation_data=val,
validation_steps =8,
validation_freq=2,
callbacks = [[tensorboard_callbacks]])
This includes the class prepared for training the model and the last few lines are subclassing the model and fitting the prepared data into the model.
The error I now get seems pretty hefty to me and it goes like this->
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\main.py", line 535, in <module>
hist = model.fit(train.take(80),
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\tensorflow\python\eager\execute.py", line 54, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
Detected at node 'gradient_tape/sub_2/BroadcastGradientArgs' defined at (most recent call last):
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\main.py", line 535, in <module>
hist = model.fit(train.take(80),
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\engine\training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\engine\training.py", line 1051, in train_function
return step_function(self, iterator)
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\engine\training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\MarkATT\lib\site-packages\keras\engine\training.py", line 1030, in run_step
outputs = model.train_step(data)
File "C:\Users\Radhe Krishna\OneDrive\Documents\MarkATT\main.py", line 498, in train_step
grad = tape.gradient(total_loss,self.model.trainable_variables)
Node: 'gradient_tape/sub_2/BroadcastGradientArgs'
Incompatible shapes: [2,4] vs. [8]
[[{{node gradient_tape/sub_2/BroadcastGradientArgs}}]] [Op:__inference_train_function_22026]
It would be a great help if someone could examine this problem and help me get out of it. I have been quite struggling with it now.
Thanks in advance!!!
I has planned to initiate training with the code provided, but I firstly received an error for the call function and as I resolved it by converting it into lambda function- This issue of incompatible shapes came up. I tries to adjust the integers in the input data and batch related fields but nothing worked.

Related

How can I integrate Optuna with Deepspeech training?

I'm trying to integrate Optuna with DeepSpeech in order to optimise some of its hyperparameters. I'm sticking to learning rate for now, just to get a feel for how Optuna works, but I've hit a roadblock and need some help.
I have a function hps_train which is what does the training step. It takes the Optuna trial object as the argument and returns the dev loss, which is what I want to use Optuna to minimise. This is the exact same function as train() in training/deepspeech_training/train.py, but with a few modifications:
def hps_train(trial):
#.
#.Same as train() in https://github.com/mozilla/DeepSpeech/blob/master/training/deepspeech_training/train.py
#.
if FLAGS.horovod:
# Effective batch size in synchronous distributed training is scaled by the number of workers. An increase in learning rate compensates for the increased batch size.
optimizer = hps_create_optimizer(learning_rate_var * hvd.size())
optimizer = hvd.DistributedOptimizer(optimizer)
else:
optimizer, learning_rate_var = hps_create_optimizer(trial)
reduce_learning_rate_op = learning_rate_var.assign(
tf.multiply(learning_rate_var, FLAGS.plateau_reduction)
)
#.
#.Same as train() https://github.com/mozilla/DeepSpeech/blob/master/training/deepspeech_training/train.py
#.
with tfv1.Session(config=Config.session_config) as session:
#.
#.Same as train() https://github.com/mozilla/DeepSpeech/blob/master/training/deepspeech_training/train.py
#.
final_dev_loss = dev_losses[-1]
log_debug("Session closed.")
return final_dev_loss
I also have some helper functions:
def hps_create_optimizer(trial):
learning_rate = trial.suggest_float("adam_lr", 1e-5, 1e-1, log=True)
with tf.variable_scope("learning_rate", reuse=tf.AUTO_REUSE):
learning_rate_var = tfv1.get_variable(
"learning_rate", initializer=learning_rate, trainable=False
)
optimizer = tfv1.train.AdamOptimizer(
learning_rate=learning_rate_var, beta1=0.9, beta2=0.999, epsilon=1e-08
)
return optimizer, learning_rate_var
def new_trial_callback(study, trial):
chkpt_path = setup_dirs(study.study_name, trial.number + 1)
FLAGS.checkpoint_dir = chkpt_path
FLAGS.save_checkpoint_dir = chkpt_path
FLAGS.load_checkpoint_dir = chkpt_path
def objective(trial, session):
if FLAGS.train_files:
val_loss = hps_train(trial, session)
return float(val_loss)
def objective_tf(trial):
tfv1.reset_default_graph()
with tfv1.Graph().as_default():
return objective(trial, session)
Putting it all together:
def main(_):
initialize_globals()
early_training_checks()
lr_study = optuna.create_study(study_name="lr_study", direction='minimize')
chkpt_dir = setup_dirs(lr_study.study_name, 0)
FLAGS.checkpoint_dir = chkpt_dir
FLAGS.save_checkpoint_dir = chkpt_dir
FLAGS.load_checkpoint_dir = chkpt_dir
lr_study.optimize(objective_tf, n_trials=25, callbacks=[new_trial_callback])
When I run this code, the first run completes normally. However, when it tries to start the second one, I get an error:
$ python training/hparam_search.py --train_files ~/datasets/cv-corpus-1/en/clips/train.csv --dev_files ~/datasets/cv-corpus-1/en/clips/dev.csv --test_files ~/datasets/cv-corpus-1/en/clips/test.csv --train_batch_size 64 --test_batch_size 64 --dev_batch_size 64 --n_hidden 512 --epochs 1 --train_cudnn --use_allow_growth --checkpoint_dir checkpoints
[I 2021-08-30 15:06:16,637] A new study created in memory with name: lr_study
I Could not find best validating checkpoint.
I Could not find most recent checkpoint.
I Initializing all variables.
I STARTING Optimization
Epoch 0 | Training | Elapsed Time: 0:00:17 | Steps: 187 | Loss: 252.374135
Epoch 0 | Validation | Elapsed Time: 0:00:12 | Steps: 109 | Loss: 255.176724 | Dataset: /home/user/datasets/cv-corpus-1/en/clips/dev.csv
I Saved new best validating model with loss 255.176724 to: checkpoints/optuna_trials/lr_study/0/best_dev-187
--------------------------------------------------------------------------------
I FINISHED optimization in 0:00:30.553797
[I 2021-08-30 15:06:50,101] Trial 0 finished with value: 255.1767243551552 and parameters: {'adam_lr': 0.006636434104761772}. Best is trial 0 with value: 255.1767243551552.
[W 2021-08-30 15:06:50,229] Trial 1 failed because of the following error: ValueError('in converted code:\n relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core:\n\n contrib/cudnn_rnn/python/layers/cudnn_rnn.py:440 call\n training)\n contrib/cudnn_rnn/python/layers/cudnn_rnn.py:518 _forward\n seed=self._seed)\n contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:1132 _cudnn_rnn\n outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(**args)\n python/ops/gen_cudnn_rnn_ops.py:2051 cudnn_rnnv3\n time_major=time_major, name=name)\n python/framework/op_def_library.py:367 _apply_op_helper\n g = ops._get_graph_from_inputs(_Flatten(keywords.values()))\n python/framework/ops.py:5979 _get_graph_from_inputs\n _assert_same_graph(original_graph_element, graph_element)\n python/framework/ops.py:5914 _assert_same_graph\n (item, original_item))\n\n ValueError: Tensor("cudnn_lstm/opaque_kernel:0", dtype=float32_ref, device=/device:GPU:0) must be from the same graph as Tensor("tower_0/Reshape_2:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:0).\n',)
Traceback (most recent call last):
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/_optimize.py", line 213, in _run_trial
value_or_values = func(trial)
File "training/hparam_search.py", line 671, in objective_tf
return objective(trial)
File "training/hparam_search.py", line 660, in objective
val_loss = hps_train(trial)
File "training/hparam_search.py", line 332, in hps_train
iterator, optimizer, dropout_rates
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 317, in get_tower_results
avg_loss, non_finite_files = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 244, in calculate_mean_edit_distance_and_loss
logits, _ = create_model(batch_x, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 195, in create_model
output, output_state = rnn_impl(layer_3, seq_length, previous_state, reuse)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 133, in rnn_impl_cudnn_rnn
sequence_lengths=seq_length)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/base.py", line 548, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core:
contrib/cudnn_rnn/python/layers/cudnn_rnn.py:440 call
training)
contrib/cudnn_rnn/python/layers/cudnn_rnn.py:518 _forward
seed=self._seed)
contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:1132 _cudnn_rnn
outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(**args)
python/ops/gen_cudnn_rnn_ops.py:2051 cudnn_rnnv3
time_major=time_major, name=name)
python/framework/op_def_library.py:367 _apply_op_helper
g = ops._get_graph_from_inputs(_Flatten(keywords.values()))
python/framework/ops.py:5979 _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
python/framework/ops.py:5914 _assert_same_graph
(item, original_item))
ValueError: Tensor("cudnn_lstm/opaque_kernel:0", dtype=float32_ref, device=/device:GPU:0) must be from the same graph as Tensor("tower_0/Reshape_2:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:0).
Traceback (most recent call last):
File "training/hparam_search.py", line 691, in <module>
absl.app.run(main)
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "training/hparam_search.py", line 684, in main
lr_study.optimize(objective_tf, n_trials=25, callbacks=[new_trial_callback])
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/study.py", line 409, in optimize
show_progress_bar=show_progress_bar,
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/_optimize.py", line 76, in _optimize
progress_bar=progress_bar,
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/_optimize.py", line 163, in _optimize_sequential
trial = _run_trial(study, func, catch)
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/_optimize.py", line 264, in _run_trial
raise func_err
File "/home/user/.local/lib/python3.6/site-packages/optuna/study/_optimize.py", line 213, in _run_trial
value_or_values = func(trial)
File "training/hparam_search.py", line 671, in objective_tf
return objective(trial)
File "training/hparam_search.py", line 660, in objective
val_loss = hps_train(trial)
File "training/hparam_search.py", line 332, in hps_train
iterator, optimizer, dropout_rates
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 317, in get_tower_results
avg_loss, non_finite_files = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 244, in calculate_mean_edit_distance_and_loss
logits, _ = create_model(batch_x, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 195, in create_model
output, output_state = rnn_impl(layer_3, seq_length, previous_state, reuse)
File "/home/user/DeepSpeech/training/deepspeech_training/train.py", line 133, in rnn_impl_cudnn_rnn
sequence_lengths=seq_length)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/base.py", line 548, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core:
contrib/cudnn_rnn/python/layers/cudnn_rnn.py:440 call
training)
contrib/cudnn_rnn/python/layers/cudnn_rnn.py:518 _forward
seed=self._seed)
contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:1132 _cudnn_rnn
outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(**args)
python/ops/gen_cudnn_rnn_ops.py:2051 cudnn_rnnv3
time_major=time_major, name=name)
python/framework/op_def_library.py:367 _apply_op_helper
g = ops._get_graph_from_inputs(_Flatten(keywords.values()))
python/framework/ops.py:5979 _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
python/framework/ops.py:5914 _assert_same_graph
(item, original_item))
ValueError: Tensor("cudnn_lstm/opaque_kernel:0", dtype=float32_ref, device=/device:GPU:0) must be from the same graph as Tensor("tower_0/Reshape_2:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:0).
It looks like the ValueError is complaining that some tensor is not from the same graph as another. But I don't understand how this can be, since I start each run within a new Graph context, so every tensor should be associated with this new graph.
Optuna version is 2.9.1 and Tensorflow version is 1.15.4
I'd be grateful for any insights into where I'm going wrong here, or even if this is the recommended way to use Optuna. Thanks very much!

Why TensorFlow throws this exception when loading a model that was normalized like this?

All latest versions from the very moment of this post.
tensorflow-gpu: 2.6.0
Python: 3.9.7
CUDA: 11.4.2
cuDNN: 8.2.4
As in the code below, when loading a model that was normalized by not passing arguments to Normalization() it throws an exception when that model is loaded by load_model(), however before loading the model I can use it without any apparent issues which makes you think it's all good since Normalization() did NOT complain and took care of the input shape. When loading a model that was normalized by Normalization(input_dim=5) it does NOT thrown any exception since a known shape is specified. That is weird I mean it should warn you that when normalizing it without passing arguments to Normalization() you should expect an exception when loading it.
I'm not sure if it's a bug so I'm posting it here before reporting a bug in the github section, maybe I'm missing to setup something.
Here's my code:
import numpy as np
import tensorflow as tf
def main():
train_data = np.array([[1, 2, 3, 4, 5]])
train_label = np.array([123])
# Uncomment this to load the model and comment the next model and normalizer related lines.
#model = tf.keras.models.load_model('AI/test.h5')
normalizer = tf.keras.layers.experimental.preprocessing.Normalization()
normalizer.adapt(train_data)
model = tf.keras.Sequential([normalizer, tf.keras.layers.Dense(units=1)])
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.1), loss='mean_absolute_error')
model.fit(train_data, train_label, epochs=3000)
model.save('AI/test.h5')
unseen_data = np.array([[1, 2, 3, 4, 6]])
prediction = model.predict(unseen_data)
print(prediction)
if __name__ == "__main__":
main()
It throws the following exception:
Traceback (most recent call last):
File "E:\Backup\Desktop\tensorflow_test.py", line 30, in <module>
main()
File "E:\Backup\Desktop\tensorflow_test.py", line 11, in main
model = tf.keras.models.load_model('AI/test.h5')
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\saving\save.py", line 200, in load_model
return hdf5_format.load_model_from_hdf5(filepath, custom_objects,
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\saving\hdf5_format.py", line 180, in load_model_from_hdf5
model = model_config_lib.model_from_config(model_config,
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\saving\model_config.py", line 52, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\layers\serialization.py", line 208, in deserialize
return generic_utils.deserialize_keras_object(
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\utils\generic_utils.py", line 674, in deserialize_keras_object
deserialized_obj = cls.from_config(
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\sequential.py", line 434, in from_config
model.add(layer)
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\tensorflow\python\training\tracking\base.py", line 530, in _method_wrapper
result = method(self, *args, **kwargs)
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\sequential.py", line 217, in add
output_tensor = layer(self.outputs[0])
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\base_layer.py", line 976, in __call__
return self._functional_construction_call(inputs, args, kwargs,
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\base_layer.py", line 1114, in _functional_construction_call
outputs = self._keras_tensor_symbolic_call(
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\base_layer.py", line 848, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\base_layer.py", line 886, in _infer_output_signature
self._maybe_build(inputs)
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\base_layer.py", line 2659, in _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
File "C:\Users\censored\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\layers\preprocessing\normalization.py", line 145, in build
raise ValueError(
ValueError: All `axis` values to be kept must have known shape. Got axis: (-1,), input shape: [None, None], with unknown axis at index: 1
Process finished with exit code 1
It looks like a bug.
Follow this link
if 'input_dim' in kwargs and 'input_shape' not in kwargs:
# Backwards compatibility: alias 'input_dim' to 'input_shape'.
kwargs['input_shape'] = (kwargs['input_dim'],)
if 'input_shape' in kwargs or 'batch_input_shape' in kwargs:
# In this case we will later create an input layer
# to insert before the current layer
if 'batch_input_shape' in kwargs:
batch_input_shape = tuple(kwargs['batch_input_shape'])
elif 'input_shape' in kwargs:
if 'batch_size' in kwargs:
batch_size = kwargs['batch_size']
else:
batch_size = None
batch_input_shape = (batch_size,) + tuple(kwargs['input_shape'])
self._batch_input_shape = batch_input_shape
The error occurs because the normalization could not get any shape information which would lead to self._input_batch_shape =(None, None).
But when loading model(deserialization), It would call build function which should have known shape in all axes.
# Sorted to avoid transposing axes.
self._keep_axis = sorted([d if d >= 0 else d + ndim for d in self.axis])
# All axes to be kept should have known shape.
for d in self._keep_axis:
if input_shape[d] is None:
raise ValueError(
'All `axis` values to be kept must have known shape. Got axis: {}, '
'input shape: {}, with unknown axis at index: {}'.format(
self.axis, input_shape, d))

Incompatible shapes while using triplet loss and pre-trained resnet

I am trying to use pre-trained resnet and fine-tune it using triplet loss. The following code I came up with is a combination of tutorials I found on the topic:
import pathlib
import tensorflow as tf
import tensorflow_addons as tfa
with tf.device('/cpu:0'):
INPUT_SHAPE = (32, 32, 3)
BATCH_SIZE = 16
data_dir = pathlib.Path('/home/user/dataset/')
base_model = tf.keras.applications.ResNet50V2(
weights='imagenet',
pooling='avg',
include_top=False,
input_shape=INPUT_SHAPE,
)
# following two lines are added after edit, originally it was model = base_model
head_model = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(base_model.output)
model = tf.keras.Model(inputs=base_model.input, outputs=head_model)
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=10,
zoom_range=0.1,
)
generator = datagen.flow_from_directory(
data_dir,
target_size=INPUT_SHAPE[:2],
batch_size=BATCH_SIZE,
seed=42,
)
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss(),
)
model.fit(
generator,
epochs=5,
)
Unfortunately after running the code I get the following error:
Found 4857 images belonging to 83 classes.
Epoch 1/5
Traceback (most recent call last):
File "ReID/external_process.py", line 35, in <module>
model.fit(
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
tmp_logs = train_function(iterator)
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 840, in _call
return self._stateless_fn(*args, **kwds)
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2829, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 1843, in _filtered_call
return self._call_flat(
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 1923, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 545, in call
outputs = execute.execute(
File "/home/user/videolytics/venv_python/lib/python3.8/site-packages/tensorflow/python/eager/execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 1328 values, but the requested shape has 16
[[{{node TripletSemiHardLoss/PartitionedCall/Reshape}}]] [Op:__inference_train_function_13749]
Function call stack:
train_function
2020-10-23 22:07:09.094736: W tensorflow/core/kernels/data/generator_dataset_op.cc:103] Error occurred when finalizing GeneratorDataset iterator: Failed precondition: Python interpreter state is not initialized. The process may be terminated.
[[{{node PyFunc}}]]
The dataset directory has 83 subdirectories, one per class and each of this subdirectories contains images of given class. The dimension 1328 in the error output is the batch size (16) times number of classes (83), and the dimension 16 is the batch size (both dimensions change accordingly if I change the BATCH_SIZE.
To be honest I do not really understand the error, so any solution or even any kind of indight where is the problem is deeply appreciated.
The problem is that the TripletSemiHardLoss expects
labels y_true to be provided as 1-D integer Tensor with shape [batch_size] of multi-class integer labels
but the flow_from_directory by default generate categorical labels; using class_mode="sparse" should fix the problem.

tflite converter from a custom keras layer

I'm getting a TypeError when trying to convert a keras .h5 file to tflite.
The new layer is a gaussian kernel (Radial Basis Layer).
To be able to save and load the keras model I defined also the get_config() method in the custom layer. So I'm able to save and load the model correctly.
class RBFLayer(Layer):
def __init__(self, output_dim, centers=None, tol = 1e-6, gamma=0, **kwargs):
super(RBFLayer, self).__init__(**kwargs)
self.centers_ = centers
self.output_dim= output_dim
self.gamma_ = gamma
self.tol_ = tol
def build(self, input_shape):
self.mu = K.variable(self.centers_, name='centers')
self.gamma = K.variable(self.gamma_, name='gamma')
self.tol = K.constant(self.tol_,name='tol')
super(RBFLayer, self).build(input_shape)
def call(self, inputs): #Kernel radial
a,b = self.mu.shape
diff = K.reshape( K.tile(inputs,(1,a))-K.reshape(self.mu,(1,-1)), (-1,a,b))
l2 = K.sum(K.pow(diff, 2), axis=-1)
res = K.exp(-1 * self.gamma * l2)
mask = K.greater( res, self.tol)
return K.switch(mask, res, K.zeros_like(res))
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
def get_config(self):
config = {
'output_dim': self.output_dim,
'centers': self.centers_,
'gamma': self.gamma_,
'tol': self.tol_
}
base_config = super(RBFLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now I want to save the model to tflite. I use TFLiteConverter from keras file including also 'custom_objects'.
def save_tflite(self, base_name):
file =base_name +'.h5'
converter = tf.lite.TFLiteConverter.from_keras_model_file(file, custom_objects={'RBFLayer':RBFLayer})
tflite_model = converter.convert()
open(base_name+".tflite", "wb").write(tflite_model)
I expect to get the tflite model file including the K.variables used while training the complete model (centers, tol, gamma).
When converting I get these error messages:
airgorbn.save_tflite( base_name)
Traceback (most recent call last):
File "<ipython-input-7-cdaa1ec46233>", line 1, in <module>
airgorbn.save_tflite( base_name)
File "C:/Users/AIRFI/Hospital/keras_RadialBasis.py", line 158, in save_tflite
converter = tf.lite.TFLiteConverter.from_keras_model_file(file, custom_objects={'RBFLayer':RBFLayer})
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\lite\python\lite.py", line 747, in from_keras_model_file
keras_model = _keras.models.load_model(model_file, custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\save.py", line 146, in load_model
return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\hdf5_format.py", line 212, in load_model_from_hdf5
custom_objects=custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\model_config.py", line 55, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\layers\serialization.py", line 89, in deserialize
printable_module_name='layer')
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\utils\generic_utils.py", line 192, in deserialize_keras_object
list(custom_objects.items())))
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 353, in from_config
model.add(layer)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\training\tracking\base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 154, in add
'Found: ' + str(layer))
TypeError: The added layer must be an instance of class Layer. Found: <__main__.RBFLayer object at 0x0000017D3A75AC50>
you need to define that layer as a custom op.
refer to this https://www.tensorflow.org/lite/guide/ops_custom

String input to categorical column via dataset

I'm trying to learn how to use the Estimator API, using input_fn to provide Dataset backed input to a feature_column generated input layer.
My code looks like
import tensorflow as tf import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
def gen():
for i in range(100000):
for j in range(10):
yield {"in": str(j)}, [str(j+1)]
data = tf.data.Dataset.from_generator(gen, ({"in": tf.string}, tf.string))
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
but running it I get
Traceback (most recent call last):
File "predict.py", line 33, in
input_fn=input_fn)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 711, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 694, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 334, in _model_fn
config=config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 190, in _dnn_model_fn
logits = logit_fn(features=features, mode=mode)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 89, in dnn_logit_fn
features=features, feature_columns=feature_columns)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 230, in input_layer
trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1837, in _get_dense_tensor
inputs, weight_collections=weight_collections, trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2123, in _get_sparse_tensors
return _CategoricalColumn.IdWeightPair(inputs.get(self), None)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1533, in get
transformed = column._transform_feature(self) # pylint: disable=protected-access
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2091, in _transform_feature
input_tensor = _to_sparse_input(inputs.get(self.key))
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1631, in _to_sparse_input
raise ValueError('Undefined input_tensor shape.')
ValueError: Undefined input_tensor shape.
Looking at the tf sources I get the impression I that the categorical_column_with_vocabulary_list expects a tensor as output instead of a string, but I have a hard time understanding how to make my input_fn provide that the right way.
Does anyone have any idea what I'm doing wrong here?
As a comparison, the following code works perfectly fine: https://pastebin.com/28QUNAjA
EDIT
I noticed that replacing tf.data.Dataset.from_generator with tf.data.Dataset.from_tensor_slices makes the code run.
I.e. the following actually works:
import tensorflow as tf
import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
data = tf.data.Dataset.from_tensor_slices(({"in": map(lambda i: str(i), range(10))}, range(1,11)))
data = data.repeat(1000)
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/usr/local/google/home/zond/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
This ought to be a bug, so I created https://github.com/tensorflow/tensorflow/issues/15178.