I am trying an LSTM encoder-decoder network and get an invalid argument error. I have just started working with so I don't have much experience.
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out-1
# check if we are beyond the dataset
if out_end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
X.append(seq_x)
y.append(seq_y)
return np.array(X), np.array(y)
#Devide Train and Test Set
train_X,train_y = split_sequences(train ,24,12)
test_X , test_y = split_sequences(test, 24, 12)
print(train_X.shape)
print(train_y.shape)
print(test_X)
# design network
model = Sequential()
model.add(LSTM(100, activation='tanh', input_shape=(n_timesteps, n_features)))
model.add(RepeatVector(n_outputs))
model.add(LSTM(100, activation='tanh',return_sequences=True))
model.add(TimeDistributed(Dense (100 ,activation = 'tanh')))
model.add(TimeDistributed(Dense(12)))
model.compile(optimizer='adam', loss='mse',metrics = ['mape', 'mae', 'mse'])
plot_model(model=model, show_shapes=True)
# fit network
history = model.fit(train_X, train_y, epochs=70, batch_size=16, validation_data=(test_X, test_y), verbose=0, shuffle=False)
# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()
And I keep getting this error which I have no idea what to do about.
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'sub_1' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 452, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 481, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 431, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-104-5b6253fe4137>", line 2, in <module>
history = model.fit(train_X, train_y, epochs=70, batch_size=16, validation_data=(test_X, test_y), verbose=0, shuffle=False)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 864, in train_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 957, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1457, in mean_absolute_error
return backend.mean(tf.abs(y_pred - y_true), axis=-1) Node: 'sub_1' required broadcastable shapes [[{{node sub_1}}]] [Op:__inference_train_function_764649]
Related
I am trying to train a neural network for the final code of Udemy's course on The Complete Self-Driving Car Course - Applied Deep Learning. This is the batch_generator
function:
def batch_generator(image_paths, steering_ang, batch_size, istraining):
while True:
batch_img = []
batch_steering = []
for i in range(batch_size):
random_index = random.randint(0, len(image_paths) -1)
if istraining:
im, steering = random_augment(image_paths[random_index], steering_ang[random_index])
else:
im = mpimg.imread(image_paths[random_index])
steering = steering_ang[random_index]
img_preprocess(im)
batch_img.append(im)
batch_steering.append(steering)
yield (np.asarray(batch_img),np.asarray(batch_steering))
, with the nvidia_model defined as:
def nvidia_model():
model = Sequential()
model.add(Convolution2D(24,(5,5),strides=(2,2),input_shape=(66,200,3),activation='elu'))
model.add(Convolution2D(36,(5,5),strides=(2,2),activation='elu'))
model.add(Convolution2D(48,(5,5),strides=(2,2),activation='elu'))
model. Add(Convolution2D(64,(3,3),activation='elu'))
model.add(Convolution2D(64,(3,3),activation='elu'))
#model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100,activation='elu'))
#model.add(Dropout(0.5))
model.add(Dense(50,activation='elu'))
#model.add(Dropout(0.5))
model. Add(Dense(10,activation='elu'))
#model.add(Dropout(0.5))
model.add(Dense(1))
optimizer = Adam(learning_rate=1e-3)
model.compile(loss='mse',optimizer=optimizer)
return model
When I train the model,
history = model. Fit(batch_generator(X_train, y_train, 100, 1), steps_per_epoch=300, epochs = 10, validation_data = batch_generator(X_valid, y_valid, 100, 0), validation_steps=200, verbose=1, shuffle=1)
I get the following error:
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-28-fd22c359b3f3> in <module>
----> 1 history = model.fit(batch_generator(X_train, y_train, 100, 1), steps_per_epoch=200, epochs = 10, validation_data = batch_generator(X_valid, y_valid, 100, 0), validation_steps=200, verbose=1, shuffle=1)
1 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
InvalidArgumentError: Graph execution error:
Detected at node 'sequential/flatten/Reshape' defined at (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.8/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.8/dist-packages/traitlets/config/application.py", line 992, in launch_instance
app.start()
File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelapp.py", line 612, in start
self.io_loop.start()
File "/usr/local/lib/python3.8/dist-packages/tornado/platform/asyncio.py", line 149, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 690, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 743, in _run_callback
ret = callback()
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 787, in inner
self.run()
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 748, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 381, in dispatch_queue
yield self.process_one()
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 225, in wrapper
runner = Runner(result, future, yielded)
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 714, in __init__
self.run()
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 748, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/kernelbase.py", line 543, in execute_request
self.do_execute(
File "/usr/local/lib/python3.8/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/ipkernel.py", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2854, in run_cell
result = self._run_cell(
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3057, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-28-fd22c359b3f3>", line 1, in <module>
history = model.fit(batch_generator(X_train, y_train, 100, 1), steps_per_epoch=200, epochs = 10, validation_data = batch_generator(X_valid, y_valid, 100, 0), validation_steps=200, verbose=1, shuffle=1)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1051, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1030, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 889, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/sequential.py", line 374, in call
return super(Sequential, self).call(inputs, training=training, mask=mask)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 458, in call
return self._run_internal_graph(
File "/usr/local/lib/python3.8/dist-packages/keras/engine/functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/layers/reshaping/flatten.py", line 98, in call
return tf.reshape(inputs, flattened_shape)
Node: 'sequential/flatten/Reshape'
Input to reshape is a tensor with 2745600 values, but the requested shape requires a multiple of 1152
[[{{node sequential/flatten/Reshape}}]] [Op:__inference_train_function_1186]
I would appreciate any help in resolving this.
I have tried changing the input_shape and batch_size to 1152 for training but that did not help.
I'm new to the cnn algorithm. I had a code that could normally work with a single layer. I tried to add layers but I keep getting the same error. I have 218 classes in total and I have 5200 photos. Can someone help?
IMAGE_SHAPE = (224, 224)
TRAINING_DATA_DIR = str(data_root)
print(TRAINING_DATA_DIR);
datagen_kwargs = dict(rescale=1.0/255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
validation_split=.20)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,validation_split=.20)
valid_generator = valid_datagen.flow_from_directory(TRAINING_DATA_DIR,subset="validation",shuffle=True,target_size=IMAGE_SHAPE)
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**datagen_kwargs)
train_generator = train_datagen.flow_from_directory(TRAINING_DATA_DIR,subset="training",shuffle=True,target_size=IMAGE_SHAPE)
image_batch_train, label_batch_train = next(iter(train_generator))
dataset_labels = sorted(train_generator.class_indices.items(), key=lambda pair:pair[1])
dataset_labels = np.array([key.title() for key, value in dataset_labels])
model=Sequential()
input_shape=(224,224,3)
model.add(Conv2D(224,kernel_size=(3,3),input_shape=input_shape))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(218, activation='softmax'))
model.compile(
optimizer="adam",
loss='sparse_categorical_crossentropy',
metrics=['acc'])
model.summary()
steps_per_epoch = np.ceil(train_generator.samples/train_generator.batch_size)
val_steps_per_epoch = np.ceil(valid_generator.samples/valid_generator.batch_size)
hist = model.fit(
train_generator,
epochs=25,
verbose=1,
steps_per_epoch=steps_per_epoch,
validation_data=valid_generator,
validation_steps=val_steps_per_epoch).history
Please can people who can help write details or write the problem through my code? Sometimes I don't quite understand. I would be grateful if you tell me where I need to fix the code above.
InvalidArgumentError: Graph execution error:
Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 612, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 149, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/ioloop.py", line 690, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "/usr/local/lib/python3.7/dist-packages/tornado/ioloop.py", line 743, in _run_callback
ret = callback()
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 787, in inner
self.run()
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 748, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 545, in execute_request
user_expressions, allow_stdin,
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2855, in run_cell
raw_cell, store_history, silent, shell_futures)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3058, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-19-c910ff4c6e34>", line 9, in <module>
validation_steps=val_steps_per_epoch).history
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1051, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1030, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 949, in compute_loss
y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 243, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1861, in sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 5239, in sparse_categorical_crossentropy
labels=target, logits=output)
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [32,218] and labels shape [6976]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_818]
this is exactly the error
I'm working on a ML project using Google Colab and Tensorflow to train a CNN, starting from the EfficientNetV2M model.
It used to work just fine until two days ago, when starting the training:
train = model.fit(X, y, epochs=save_every_n_epochs, batch_size=16, verbose=1)
gave the following error:
UnimplementedError Traceback (most recent call last)
<ipython-input-5-1b2fb9765100> in <module>
70 print(f"Training the model for {save_every_n_epochs} epochs")
71
---> 72 train = model.fit(X, y, epochs=save_every_n_epochs, batch_size=16, verbose=1)
73 print("Model trained")
74
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
UnimplementedError: Graph execution error:
Detected at node 'sequential/efficientnetv2-m/stem_conv/Conv2D' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 612, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/ioloop.py", line 758, in _run_callback
ret = callback()
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 1233, in inner
self.run()
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 1147, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 326, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 326, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 545, in execute_request
user_expressions, allow_stdin,
File "/usr/local/lib/python3.7/dist-packages/tornado/gen.py", line 326, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2855, in run_cell
raw_cell, store_history, silent, shell_futures)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3058, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-1b2fb9765100>", line 72, in <module>
train = model.fit(X, y, epochs=save_every_n_epochs, batch_size=16, verbose=1)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1051, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1030, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 889, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/sequential.py", line 374, in call
return super(Sequential, self).call(inputs, training=training, mask=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 459, in call
inputs, training=training, mask=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 459, in call
inputs, training=training, mask=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/convolutional/base_conv.py", line 250, in call
outputs = self.convolution_op(inputs, self.kernel)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/convolutional/base_conv.py", line 232, in convolution_op
name=self.__class__.__name__)
Node: 'sequential/efficientnetv2-m/stem_conv/Conv2D'
DNN library is not found.
[[{{node sequential/efficientnetv2-m/stem_conv/Conv2D}}]] [Op:__inference_train_function_45723]
I wasn't able to train any model since then, always getting this error, also when loading previously stored models.
I am able to train when i use an environment without GPU, but it is obviously too slow.
I've also tried to change the TF version as suggested in other topics, without any success.
Any suggestions?
Yes there's a similar question from a few hours ago. Apparently this is a problem related to the latest Tensoflow update introduced in Colab (Tensorflow 2.9.1).
As a quick fix you could downgrade Tensorflow. However only downgrading to tf 2.8, as suggested in the linked question wasn't enough to fix the problem in my case.
Try this:
!pip uninstall tensorflow-gpu
!pip install tensorflow-gpu==2.8
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2
Also make sure to restart the runtime if it asks you to do so.
I get the "Graph execution error" trying to use U-net for segmentation in colab, it is just fine if I don't use GPU in runtime..
I saw a similar issue here, but I couldn't figure out how that can help me with my code:
enter link description here
!wget https://documents.epfl.ch/groups/c/cv/cvlab-unit/www/data/%20ElectronMicroscopy_Hippocampus/training.tif
!wget https://documents.epfl.ch/groups/c/cv/cvlab-unit/www/data/%20ElectronMicroscopy_Hippocampus/training_groundtruth.tif
!pip install patchify
!pip install tensorflow-gpu
# !pip install tensorflow
!pip install keras
!pip install segmentation-models
!pip install multipagetiff
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
from patchify import patchify
from PIL import Image
import segmentation_models as sm
import multipagetiff as mtif
from matplotlib import pyplot as plt
import glob
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow.keras as keras
img = mtif.read_stack("/content/training.tif", units='um')
msk = mtif.read_stack("/content/training_groundtruth.tif", units='um')
scaler = MinMaxScaler()
patch_size = 64 # the size of patches
image_dataset = [] # preparing dataset(images)
for k in range(0, 1):
backtorgb = cv2.cvtColor(img[k,:,:],cv2.COLOR_GRAY2RGB) #converting image data to RGB(3channels)
patches_img = patchify(backtorgb, (patch_size, patch_size, 3), step=patch_size) # extracing patches with no overlap
for i in range(patches_img.shape[0]):
for j in range(patches_img.shape[1]):
single_patch_img = patches_img[i,j,:,:]
single_patch_img = scaler.fit_transform(single_patch_img.reshape(-1, single_patch_img.shape[-1])).reshape(single_patch_img.shape)
image_dataset.append(single_patch_img)
image_dataset = np.array(image_dataset)
image_dataset = image_dataset[:,0,:,:,:]
mask_dataset = [] # preparing dataset(masks)
for k in range(0, 1):
patches_mask = patchify(msk[k,:,:], (patch_size, patch_size), step=patch_size) # extracing patches with no overlap
for i in range(patches_mask.shape[0]):
for j in range(patches_mask.shape[1]):
single_patch_mask = patches_mask[i,j,:,:]
mask_dataset.append(single_patch_mask)
'''#Normalize images ''' # these data are already normalized but this step is crucial
#D not normalize masks, just rescale to 0 to 1.
mask_dataset = np.expand_dims((np.array(mask_dataset)),3) /255.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(image_dataset, mask_dataset, test_size = 0.10, random_state = 0)
activation='softmax'
n_classes=1
LR = 0.0001
optim = keras.optimizers.Adam(LR)
# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
# set class weights for dice_loss (car: 1.; pedestrian: 2.; background: 0.5;)
dice_loss = sm.losses.DiceLoss(class_weights=np.array([0.25, 0.25, 0.25]))
focal_loss = sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)
metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]
###Model 1
BACKBONE1 = 'resnet34'
preprocess_input1 = sm.get_preprocessing(BACKBONE1)
# preprocessing input
X_train1 = preprocess_input1(X_train)
X_test1 = preprocess_input1(X_test)
# define model
model1 = sm.Unet(BACKBONE1, encoder_weights='imagenet', classes=n_classes, activation=activation)
model1.compile(optim, total_loss, metrics=metrics)
# print(model1.summary())
history1=model1.fit(X_train1,
y_train,
batch_size=8,
epochs=5,
verbose=10,
validation_data=(X_test1, y_test))
model_standard.save('mitochondria_standard_Unet.hdf5')
and this is the error I get, I tried different input size and also defined Unet in a very classic fashion.. but they weren't the case ...
Epoch 1/5
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-7-b10395c761fe> in <module>()
28 epochs=5,
29 verbose=10,
---> 30 validation_data=(X_test1, y_test))
31 model_standard.save('mitochondria_standard_Unet.hdf5')
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InternalError: Graph execution error:
Detected at node 'model_3/bn_data/FusedBatchNormV3' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-7-b10395c761fe>", line 30, in <module>
validation_data=(X_test1, y_test))
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1051, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1030, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 889, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 459, in call
inputs, training=training, mask=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 750, in call
outputs = self._fused_batch_norm(inputs, training=training)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 595, in _fused_batch_norm
training, _fused_batch_norm_training, _fused_batch_norm_inference)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/control_flow_util.py", line 106, in smart_cond
pred, true_fn=true_fn, false_fn=false_fn, name=name)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 581, in _fused_batch_norm_training
exponential_avg_factor=exponential_avg_factor)
Node: 'model_3/bn_data/FusedBatchNormV3'
cuDNN launch failure : input shape ([8,3,256,256])
[[{{node model_3/bn_data/FusedBatchNormV3}}]] [Op:__inference_train_function_14008]
I got this error at the start of the training, what does it mean? there is no clear indication of what causes the error/the source of the problem ..............................................................................................................................................................
Epoch 1/10
Traceback (most recent call last):
File "train.py", line 199, in <module>
app.run(main)
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 299, in run
_run_main(main, args)
File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))
File "train.py", line 194, in main
validation_data=val_dataset)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 71, in _method_wrapper
return method(self, *args, **kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 920, in fit
tmp_logs = train_function(iterator)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 608, in __call__
result = self._call(*args, **kwds)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 655, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 535, in _initialize
*args, **kwds))
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2447, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2775, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2665, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 981, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 446, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 968, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:630 train_function *
return step_function(self, iterator)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:620 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/root/.local/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:952 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/root/.local/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2292 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/root/.local/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2651 _call_for_each_replica
return fn(*args, **kwargs)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:613 run_step **
outputs = model.train_step(data)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:573 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:145 __call__
losses = self.call(y_true, y_pred)
/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:248 call
return self.fn(y_true, y_pred, **self._fn_kwargs)
/content/drive/My Drive/yolov3-tf2/yolov3_tf2/models.py:264 yolo_loss
y_pred, anchors, classes)
/content/drive/My Drive/yolov3-tf2/yolov3_tf2/models.py:155 yolo_boxes
pred, (2, 2, 1, classes), axis=-1)
/root/.local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py:1978 split
value=value, size_splits=size_splits, axis=axis, num_split=num, name=name)
/root/.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py:9881 split_v
num_split=num_split, name=name)
/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py:595 _create_op_internal
compute_device)
/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:3470 _create_op_internal
op_def=op_def)
/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:1960 __init__
control_input_ops, op_def)
/root/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:1800 _create_c_op
raise ValueError(str(e))
ValueError: can't split axis of size 85 into pieces of size [2,2,1,20] for '{{node yolo_loss/split}} = SplitV[T=DT_FLOAT, Tlen=DT_INT32, num_split=4](model/layer_207_output_0/layer_206_lambda/Reshape, yolo_loss/Const, yolo_loss/split/split_dim)' with input shapes: [?,?,?,3,85], [4], [] and with computed input tensors: input[1] = <2 2 1 20>, input[2] = <-1>
this is due to not having same no of nodes: Here 85 cannot be convert or resize into 2*2*1*20 which is equal to 80
you can reshape or resize only node value are equal
In you case 85 is not equal to 2*2*1*20(80)