ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible - numpy

I'm having a problem with the shapes of my input to Keras/Tensorflow.
EDIT - I found that when I just specify 'accuracy' for my metrics it works fine and I can train my model, however when I do it the other way by adding precision, recall it fails with the error.
My Model summary is like this
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 128, 64) 2251520
_________________________________________________________________
bidirectional (Bidirectional (None, 128, 128) 66048
_________________________________________________________________
time_distributed (TimeDistri (None, 128, 18) 2322
=================================================================
Total params: 2,319,890
Trainable params: 2,319,890
Non-trainable params: 0
I'm doing NER and padded my sentences to 128 words.
My Code is as follows and dataset is from here
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
physical_devices = tf.config.list_physical_devices()
tf.config.experimental.set_memory_growth(physical_devices[1], enable=True)
df = pd.read_csv("ner_dataset.csv", encoding="latin1")
# fill NaN - propogate non null values forward
df = df.fillna(method="ffill")
sent_count = len(set(df["Sentence #"].values))
print(sent_count)
words = list(set(df["Word"].values))
words_count = len(words)
print(words_count)
word2idx = {}
# add the padding and unknown token
word2idx["PAD_TOKEN"] = len(word2idx)
word2idx["UNK_TOKEN"] = len(word2idx)
# add the rest
for i in range(0, len(words)):
word2idx[words[i]] = len(word2idx)
# index to word mapping
idx2word = {i: w for w, i in word2idx.items()}
# number of unique tags
tags = list(set(df["Tag"].values))
tags_count = len(tags)
print(tags_count)
tag2idx = {}
tag2idx['PAD_TAG'] = 0 # this is the tag that is assigned to the pad tokens 'PAD_TOKEN'
for i in range(0, len(tags)):
tag2idx[tags[i]] = len(tag2idx)
# index to tag mapping
idx2tag = {i: w for w, i in tag2idx.items()}
def getSentences(dataframe):
sentences = []
groups = dataframe.groupby("Sentence #")
for name, group in groups:
zipped = zip(group["Word"], group["Tag"])
sentences.append(list(zipped))
return sentences
sents = getSentences(df)
len(sents[0]), len(sents[1]) # sentences are of different lengths
max_len = 128
y = [[tag2idx[word[1]] for word in s] for s in sents]
y = pad_sequences(maxlen=max_len, sequences=y, value=tag2idx["PAD_TAG"], padding='post', truncating='post')
x = [[word2idx[word[0]] for word in s] for s in sents]
x = pad_sequences(maxlen=max_len, sequences=x, value=word2idx["PAD_TOKEN"], padding='post', truncating='post')
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
vocab_size = len(word2idx)
vocab_size
batch_size = 32
model = models.Sequential()
embeddinglayer = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len)
model.add(embeddinglayer)
bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))
model.add(bilstm)
num_tags = len(tag2idx)
timedist = layers.TimeDistributed(layers.Dense(num_tags, activation="softmax"))
model.add(timedist)
model.summary()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=METRICS)
history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
The types after splitting are all np array
type(x_train), type(x_test), type(y_train), type(y_test)
(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
And the shapes are
((38367, 128), (9592, 128), (38367, 128), (9592, 128))
Each training item (sentence) is an array of word index of length 128, for example the x_train[0] looks like the array below (the y values are similar (same length 128) except they are index of the tags/labels for each word.)
array([25653, 1878, 26510, 12653, 33524, 15752, 30488, 14594, 33943,
3656, 22478, 596, 13235, 10080, 16432, 18190, 20273, 10254,
34463, 15526, 24899, 4359, 30488, 10525, 19165, 30439, 16205,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)
EDIT: Error Below
Epoch 1/25
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-35a0fcfc79ab> in <module>
111
112
--> 113 history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
869 # This is the first call of __call__, so we have to initialize.
870 initializers = []
--> 871 self._initialize(args, kwds, add_initializers_to=initializers)
872 finally:
873 # At this point we know that the initialization is complete (or less
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
724 self._concrete_stateful_fn = (
725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 726 *args, **kwds))
727
728 def invalid_creator_scope(*unused_args, **unused_kwds):
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2967 args, kwargs = None, None
2968 with self._lock:
-> 2969 graph_function, _ = self._maybe_define_function(args, kwargs)
2970 return graph_function
2971
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3359
3360 self._function_cache.missed.add(call_context_key)
-> 3361 graph_function = self._create_graph_function(args, kwargs)
3362 self._function_cache.primary[cache_key] = graph_function
3363
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3204 arg_names=arg_names,
3205 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3206 capture_by_value=self._capture_by_value),
3207 self._function_attributes,
3208 function_spec=self.function_spec,
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988 _, original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args, **func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635 return out
636
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function *
return step_function(self, iterator)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step **
outputs = model.train_step(data)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\training.py:758 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:408 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:90 decorated
update_op = update_state_fn(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\metrics.py:1291 update_state **
sample_weight=sample_weight)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\keras\utils\metrics_utils.py:354 update_confusion_matrix_variables
y_pred.shape.assert_is_compatible_with(y_true.shape)
c:\miniconda3\envs\ner\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible

I think that your x, y arguments to model.fit() are inconsistent: x is a list, y -s a numpy array. Try:
history = model.fit(np_array(x_train), np.array(y_train))

Related

Tensor dimension mismatch when neural network output changed from dense-linear type to dense-1-hot type

I have a deep neural-network in tensorflow that trains fine with a 5-unit dense output layer with linear activation (data labels are linear scaled values similar to linear regression). When I change the output label encoding to 1-hot and the output layer to 5 individual, 25-unit dense 'heads' tensorflow will not train (ie .fit) and complains about the tensor shape being incompatible. It seems like a straight forward issue but the data label shape looks fine to me.
ValueError: Shapes (128, 1, 25) and (128, 128, 25) are incompatible
Details below:
Model's output layer details:
Linear dense output model:
X_in = Input(batch_input_shape=(batch_size=128, 128, 14), name='input')
... many layers...
yhat = Dense(5, activation='linear', name='Lin_classify')(X)
model = Model(inputs=X_in, outputs=[yhat])
model.compile(loss='mse', optimizer=Adam(learning_rate=model_params['learn_rate']), metrics=['accuracy'])
model.summary()
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
FC_1 (TimeDistributed) (128, 128, 128) 32896 BDLSTM_3[0][0]
__________________________________________________________________________________________________
Lin_classify (Dense) (128, 128, 5) 645 FC_1[0][0]
==================================================================================================
1-hot encoded model:
Changed output layer to multi-head.
The output layer will pick 5 objects that can each be 1 of 25 classes
X_in = Input(batch_input_shape=(batch_size=128, 128, 14), name='input')
... many layers...
yhat1 = Dense(25, activation='softmax', name='y_1hot_1')(X)
yhat2 = Dense(25, activation='softmax', name='y_1hot_2')(X)
yhat3 = Dense(25, activation='softmax', name='y_1hot_3')(X)
yhat4 = Dense(25, activation='softmax', name='y_1hot_4')(X)
yhat5 = Dense(25, activation='softmax', name='y_1hot_5')(X)
model = Model(inputs=X_in, outputs=[yhat1, yhat2, yhat3, yhat4, yhat5])
model.compile(loss={'y_1hot_1':loss, 'y_1hot_2':loss, 'y_1hot_3':loss, 'y_1hot_4':loss, 'y_1hot_5':loss},
optimizer=Adam(learning_rate=model_params['learn_rate']),
metrics={'y_1hot_1':metric, 'y_1hot_2':metric,'y_1hot_3':metric, 'y_1hot_4':metric, 'y_1hot_5':metric})
model.summary()
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
FC_1 (TimeDistributed) (128, 128, 128) 32896 BDLSTM_3[0][0]
__________________________________________________________________________________________________
y_1hot_1 (Dense) (128, 128, 25) 3225 FC_1[0][0]
__________________________________________________________________________________________________
y_1hot_2 (Dense) (128, 128, 25) 3225 FC_1[0][0]
__________________________________________________________________________________________________
y_1hot_3 (Dense) (128, 128, 25) 3225 FC_1[0][0]
__________________________________________________________________________________________________
y_1hot_4 (Dense) (128, 128, 25) 3225 FC_1[0][0]
__________________________________________________________________________________________________
y_1hot_5 (Dense) (128, 128, 25) 3225 FC_1[0][0]
==================================================================================================
Both varieties of models compile as expected. Note that the output shapes of both models are similar (128, 128, x) except that the 1-hot model has 5 output heads.
Data:
X_train dims: (num_samples, window_length, num_features)
Linear model: X_train.shape = (3840, 128, 14); Y_train.shape = (3840, 1, 5)
1-hot model: X_train.shape = (3840, 128, 14); Y_train[0:5].shape = (3840, 1, 25)
Linear model:
print(model.inputs)
>[<KerasTensor: shape=(128, 128, 14) dtype=float32 (created by layer 'input')>]
print(model.outputs)
>[<KerasTensor: shape=(128, 128, 5) dtype=float32 (created by layer 'Lin_classify')>]
Note: label shape: (m, 1, 5) Output shape: (128, 128, 5) This trains fine
1-hot model:
print(model.inputs)
>[<KerasTensor: shape=(128, 128, 14) dtype=float32 (created by layer 'input')>]
print(model.outputs)
>[<KerasTensor: shape=(128, 128, 25) dtype=float32 (created by layer 'y_1hot_1')>,
<KerasTensor: shape=(128, 128, 25) dtype=float32 (created by layer 'y_1hot_2')>,
<KerasTensor: shape=(128, 128, 25) dtype=float32 (created by layer 'y_1hot_3')>,
<KerasTensor: shape=(128, 128, 25) dtype=float32 (created by layer 'y_1hot_4')>,
<KerasTensor: shape=(128, 128, 25) dtype=float32 (created by layer 'y_1hot_5')>]
Note: label shape: (m, 1, 25) Output shape: (128, 128, 25) This fails with mismatch error
Training is the same for both models and occurs 1 epoch at a time in stateful mode (stateful applies to hidden lstm layers, not the output layer):
for i in range(n_epochs):
ehist = model.fit(x=X_train, y=Y_train, batch_size=128, epochs=1,
shuffle=False, verbose=2)
model.reset_states()
The model receives batches of 128 examples of 128x14 input data and outputs 128, 1x5 predictions in the case of the linear model. And outputs 5 individual 128, 1x25 predictions in the case of the 1-hot model.
Question:
Why does training the 1-hot model immediately fail with the following error?
ValueError: Shapes (128, 1, 25) and (128, 128, 25) are incompatible
(the linear model does not complain and the main difference between model outputs is the 3rd dimension; ie 5 -> 25).
Any help with this would be greatly appreciated.
Here is the full error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[23], line 7
6 for i in range(n_epochs):
----> 7 ehist = model.fit(x=X_train, y=Y_train, batch_size=batch_sz, epochs=1,
8 shuffle=False, verbose=2)
9 model.reset_states()
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\training.py:1184, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1177 with tf.profiler.experimental.Trace(
1178 'train',
1179 epoch_num=epoch,
1180 step_num=step,
1181 batch_size=batch_size,
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\def_function.py:885, in Function.__call__(self, *args, **kwds)
882 compiler = "xla" if self._jit_compile else "nonXla"
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
887 new_tracing_count = self.experimental_get_tracing_count()
888 without_tracing = (tracing_count == new_tracing_count)
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\def_function.py:933, in Function._call(self, *args, **kwds)
930 try:
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
936 # interestingly an exception was raised) so we no longer need a lock.
937 self._lock.release()
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\def_function.py:759, in Function._initialize(self, args, kwds, add_initializers_to)
756 self._lifted_initializer_graph = lifted_initializer_graph
757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
758 self._concrete_stateful_fn = (
--> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
760 *args, **kwds))
762 def invalid_creator_scope(*unused_args, **unused_kwds):
763 """Disables variable creation."""
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\function.py:3066, in Function._get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\function.py:3463, in Function._maybe_define_function(self, args, kwargs)
3459 return self._define_function_with_shape_relaxation(
3460 args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3466 return graph_function, filtered_flat_args
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\function.py:3298, in Function._create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3293 missing_arg_names = [
3294 "%s_%d" % (arg, i) for i, arg in enumerate(missing_arg_names)
3295 ]
3296 arg_names = base_arg_names + missing_arg_names
3297 graph_function = ConcreteFunction(
-> 3298 func_graph_module.func_graph_from_py_func(
3299 self._name,
3300 self._python_function,
3301 args,
3302 kwargs,
3303 self.input_signature,
3304 autograph=self._autograph,
3305 autograph_options=self._autograph_options,
3306 arg_names=arg_names,
3307 override_flat_arg_shapes=override_flat_arg_shapes,
3308 capture_by_value=self._capture_by_value),
3309 self._function_attributes,
3310 function_spec=self.function_spec,
3311 # Tell the ConcreteFunction to clean up its graph once it goes out of
3312 # scope. This is not the default behavior since it gets used in some
3313 # places (like Keras) where the FuncGraph lives longer than the
3314 # ConcreteFunction.
3315 shared_func_graph=False)
3316 return graph_function
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\func_graph.py:1007, in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1004 else:
1005 _, original_func = tf_decorator.unwrap(python_func)
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
1010 # TensorArrays and `None`s.
1011 func_outputs = nest.map_structure(convert, func_outputs,
1012 expand_composites=True)
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\eager\def_function.py:668, in Function._defun_with_scope.<locals>.wrapped_fn(*args, **kwds)
664 with default_graph._variable_creator_scope(scope, priority=50): # pylint: disable=protected-access
665 # __wrapped__ allows AutoGraph to swap in a converted function. We give
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
File ~\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\func_graph.py:994, in func_graph_from_py_func.<locals>.wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\training.py:853 train_function *
return step_function(self, iterator)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\training.py:835 run_step **
outputs = model.train_step(data)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\training.py:788 train_step
loss = self.compiled_loss(
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\engine\compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\losses.py:141 __call__
losses = call_fn(y_true, y_pred)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\keras\backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
C:\Users\tm2\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (128, 1, 44) and (128, 128, 44) are incompatible

Evaluate ROC AUC for Keras sequential multiclass model

I want to evaluate the ROC AUC for my multiclass sequential Keras model using the multiclass_roc_auc_score function. My code raised ValueError: Shapes (None, 1) and (None, 4) are incompatible.
I want to perform multiclass classification:
class MulticlassTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='multiclass_true_positives', **kwargs):
super(MulticlassTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
values = tf.cast(values, 'float32')
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, 'float32')
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.)
I compile the Keras model with the metrics:
# Report the AUC of a model outputting a probability.
hypermodel.compile(optimizer='sgd',
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.AUC(), MulticlassTruePositives()])
I implement Keras callback that plots the ROC curve and Confusion Matrix to a folder:
class PerformanceVisualizationCallback(Callback):
def __init__(self, model, test_data, image_dir):
super().__init__()
self.model = model
self.test_data = test_data
os.makedirs(image_dir, exist_ok=True)
self.image_dir = image_dir
def on_epoch_end(self, epoch, logs={}):
y_pred = np.asarray(self.model.predict(self.test_data[0]))
y_true = self.test_data[1]
y_pred_class = np.argmax(y_pred, axis=1)
# plot and save confusion matrix
fig, ax = plt.subplots(figsize=(16,12))
plot_confusion_matrix(y_true, y_pred_class, ax=ax)
fig.savefig(os.path.join(self.image_dir, f'confusion_matrix_epoch_{epoch}'))
# plot and save roc curve
fig, ax = plt.subplots(figsize=(16,12))
plot_roc(y_true, y_pred, ax=ax)
fig.savefig(os.path.join(self.image_dir, f'roc_curve_epoch_{epoch}'))
performance_viz_cbk = PerformanceVisualizationCallback(
model=model,
test_data=X_test,
image_dir='perorfmance_charts')
history = hypermodel.fit(x=X_train,
y=y_train,
epochs=5,
callbacks=[performance_viz_cbk])
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_17/963709483.py in <module>
2 y=y_train,
3 epochs=5,
----> 4 callbacks=[performance_viz_cbk])
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
922 # In this case we have not created variables on the first call. So we can
923 # run the first trace but we should fail if variables are created.
--> 924 results = self._stateful_fn(*args, **kwds)
925 if self._created_variables and not ALLOW_DYNAMIC_VARIABLE_CREATION:
926 raise ValueError("Creating variables on a non-first call to a function"
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
3036 with self._lock:
3037 (graph_function,
-> 3038 filtered_flat_args) = self._maybe_define_function(args, kwargs)
3039 return graph_function._call_flat(
3040 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3458 call_context_key in self._function_cache.missed):
3459 return self._define_function_with_shape_relaxation(
-> 3460 args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3461
3462 self._function_cache.missed.add(call_context_key)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _define_function_with_shape_relaxation(self, args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3380
3381 graph_function = self._create_graph_function(
-> 3382 args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes)
3383 self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function
3384
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3306 arg_names=arg_names,
3307 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3308 capture_by_value=self._capture_by_value),
3309 self._function_attributes,
3310 function_spec=self.function_spec,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:789 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/opt/conda/lib/python3.7/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/losses.py:1666 categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 4) are incompatible
I took time to make your display graph make sense with my sample data.
Sample: First of all you need to create dimension output matching to your display calculations matrix see loss Fn, Optimizer, Matrix, and the network output. Then you need to create senses of predict data, they are not allowed multiple input value then I take time to make it significant as a sample. Max or Sum is significant
Sum of all Max is not significant as all priority in the my queue.
import os
from os.path import exists
import tensorflow as tf
import tensorflow_text as tft
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.svm import SVC
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_accuracy = []
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
input_word = tf.constant(' \'Cause it\'s easy as an ice cream sundae Slipping outta your hand into the dirt Easy as an ice cream sundae Every dancer gets a little hurt Easy as an ice cream sundae Slipping outta your hand into the dirt Easy as an ice cream sundae Every dancer gets a little hurt Easy as an ice cream sundae Oh, easy as an ice cream sundae ')
dataset = tf.data.Dataset.from_tensors( tf.strings.bytes_split(input_word) )
window_size = 6
dataset = dataset.map( lambda x: tft.sliding_window(x, width=window_size, axis=0) ).flat_map(tf.data.Dataset.from_tensor_slices)
dataset = dataset.batch(1)
list_word = []
label = []
vocab = [ "a", "b", "c", "d", "e", "f", "g", "h", "I", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "_" ]
vocab_hot = [ "ice" ]
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
layer_hot = tf.keras.layers.StringLookup(vocabulary=vocab_hot)
for example in dataset.take(200):
sequences_mapping_string = layer(example[0])
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 6) )
list_word.append(sequences_mapping_string.numpy())
sequences_mapping_string = tf.reduce_sum(layer_hot( example[0][0] + example[0][1] + example[0][2] ))
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 1) )
label.append(sequences_mapping_string.numpy())
list_word = tf.constant(list_word, shape=(200, 1, 6, 1), dtype=tf.int64)
label = tf.constant(label, shape=(200, 1, 1, 1), dtype=tf.int64)
dataset = tf.data.Dataset.from_tensor_slices((list_word, label))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MulticlassTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='multiclass_true_positives', **kwargs):
super(MulticlassTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
values = tf.cast(values, 'float32')
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, 'float32')
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_state(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.)
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
mycustomlayer = MyLSTMLayer( 64, True, False )
mycustomlayer_2 = MyLSTMLayer( 16, True, False )
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(6, 1)),
tf.keras.layers.Embedding(1000, 128, input_length=1),
tf.keras.layers.Reshape(( 6, 128 )),
tf.keras.layers.SpatialDropout1D( rate = 0.4 ),
tf.keras.layers.Conv1D(32, 6, activation="relu"),
tf.keras.layers.MaxPooling1D(strides=1, pool_size=1),
### LSTM
mycustomlayer,
tf.keras.layers.Reshape(( 1, 1, 64 )),
tf.keras.layers.UpSampling2D( size=(4, 4), data_format=None, interpolation='nearest' ),
tf.keras.layers.Conv1D(16, 3, activation="relu"),
tf.keras.layers.Reshape(( 8, 16 )),
tf.keras.layers.MaxPooling1D(),
tf.keras.layers.GlobalMaxPooling1D(),
### LSTM
tf.keras.layers.Reshape(( 1, 16 )),
mycustomlayer_2,
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1),
], name="MyModelClassification")
model.build()
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Callback
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class PerformanceVisualizationCallback(tf.keras.callbacks.Callback):
def __init__(self, model, test_data, image_dir):
super().__init__()
self.model = model
self.test_data = test_data
self.test_data = tf.constant( self.test_data, shape=(20, 1, 6, 1) )
os.makedirs(image_dir, exist_ok=True)
self.image_dir = image_dir
def on_epoch_end(self, epoch, logs={}):
y_pred = tf.constant(self.model.predict(self.test_data[0])).numpy()
y_true = self.test_data[1]
y_pred_class = tf.math.argmax(y_pred, axis=1).numpy()
clf = SVC(random_state=0)
clf.fit(tf.constant( self.test_data, shape=(20, 6) ).numpy(), tf.cast( tf.linspace( 0, 19, 20, name='linspace', axis=0 ), dtype=tf.int64 ).numpy())
predictions = clf.predict(tf.constant( self.test_data, shape=(20, 6) ).numpy())
cm = sklearn.metrics.confusion_matrix(
[tf.math.argmax(self.test_data[1], axis=1).numpy()[0], tf.math.argmax(self.test_data[2], axis=1).numpy()[0],
tf.math.argmax(self.test_data[3], axis=1).numpy()[0], tf.math.argmax(self.test_data[4], axis=1).numpy()[0], tf.math.argmax(self.test_data[5], axis=1).numpy()[0]],
[1, 2, 3, 4, 5], labels=clf.classes_)
disp = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()
# fig.savefig(os.path.join(self.image_dir, f'confusion_matrix_epoch_{epoch}'))
clf = sklearn.svm.SVC(random_state=0)
clf.fit(tf.constant( self.test_data, shape=(20, 6) ).numpy(), tf.linspace( 0, 19, 20, name='linspace', axis=0 ).numpy())
if (epoch <= 2):
list_accuracy.append( logs['accuracy'] )
if (epoch == 2):
fpr, tpr, thresholds = sklearn.metrics.roc_curve([0, 0, 1, 1], [0, list_accuracy[0], list_accuracy[1], list_accuracy[2]])
roc_auc = sklearn.metrics.auc(fpr, tpr)
display = sklearn.metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')
display.plot()
plt.show()
# fig.savefig(os.path.join(self.image_dir, f'roc_curve_epoch_{epoch}'))
list_word = []
label = []
test_dataset = tf.data.Dataset.from_tensors( tf.strings.bytes_split(input_word) )
test_dataset = test_dataset.map( lambda x: tft.sliding_window(x, width=window_size, axis=0) ).flat_map(tf.data.Dataset.from_tensor_slices)
test_dataset = test_dataset.batch(1)
for example in test_dataset.take(20):
sequences_mapping_string = layer(example[0])
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 6) )
list_word.append(sequences_mapping_string.numpy())
sequences_mapping_string = tf.reduce_sum(layer_hot( example[0][0] + example[0][1] + example[0][2] ))
sequences_mapping_string = tf.constant( sequences_mapping_string, shape=(1, 1) )
label.append(sequences_mapping_string.numpy())
list_word = tf.constant(list_word, shape=(20, 1, 6, 1), dtype=tf.int64)
label = tf.constant(label, shape=(20, 1, 1, 1), dtype=tf.int64)
test_dataset = tf.data.Dataset.from_tensor_slices((list_word, label))
performance_viz_cbk = PerformanceVisualizationCallback(
model=model,
test_data=list_word,
image_dir='c:\perorfmance_charts')
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.SGD(
learning_rate=0.000001,
momentum=0.5,
nesterov=True,
name='SGD',
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# Report the AUC of a model outputting a probability.
model.compile(optimizer=optimizer, loss=lossfn,
metrics=['accuracy', tf.keras.metrics.AUC(), MulticlassTruePositives()])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
model.load_weights(checkpoint_path)
print("model load: " + checkpoint_path)
input("Press Any Key!")
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, batch_size=100, epochs=3, callbacks=[performance_viz_cbk] )

TFBertForSequenceClassification for multi label classification

I am trying to fine-tune a bert model for multi-label classification.
here is how my data looks like. I have put the entire code on this colab notebook
({'input_ids': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([ 2, 8318, 1379, 7892, 2791, 20630, 1, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)>,
'attention_mask': <tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>},
<tf.Tensor: shape=(7,), dtype=int64, numpy=array([1, 0, 0, 0, 0, 0, 0])>)
The first element is the id,
The second element corresponds to the attention_masks
the third one are the labels - here I have 7 lables.
First effort:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
which ends up with the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-48-4408a1f17fbe> in <module>()
10 loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
11 model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
---> 12 history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
13
14
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-48-4408a1f17fbe>", line 12, in <module>
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_187978]
Second Effort inspired by this piece of code
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
returns the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-49-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'Equal' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-49-8aa1173bef76>", line 6, in <module>
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1156, in train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 729, in update_state
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 4086, in sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
Node: 'Equal'
required broadcastable shapes
[[{{node Equal}}]] [Op:__inference_train_function_214932]
I hope I believe given major changes both in tf2 and (TF-based) huggingface transformers
UPDATE
Here is the entire code with a dummy dataset; the whole thing is also available on this colab notebook
load the libraries
import os
import pandas as pd
import numpy as np
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from transformers import BertTokenizer
import tensorflow as tf
make a dummy data
x_train = ['هان از وقتی که زفتم مدرسه',
'معاویه برادر شمر',
'وقتی که از پنجره سرشرو میاره بیرون دالی میکنه',
'هر دو سحرند این کجا و آن کجا']
y_train = [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0]]
x_test, x_valid = x_train, x_train
y_test, y_valid = y_train, y_train
add the configs
# general config
MAX_LEN = 128
batch_size = 32
TRAIN_BATCH_SIZE = batch_size
VALID_BATCH_SIZE = batch_size
TEST_BATCH_SIZE = batch_size
EPOCHS = 3
EEVERY_EPOCH = 1000
LEARNING_RATE = 2e-5
CLIP = 0.0
make the data huggingface friendly
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
MAX_LEN = 128
def tokenize_sentences(sentences, tokenizer, max_seq_len = 128):
tokenized_sentences = []
for sentence in tqdm(sentences):
tokenized_sentence = tokenizer.encode(
sentence, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
max_length = max_seq_len, # Truncate all sentences.
)
tokenized_sentences.append(tokenized_sentence)
return tokenized_sentences
def create_attention_masks(tokenized_and_padded_sentences):
attention_masks = []
for sentence in tokenized_and_padded_sentences:
att_mask = [int(token_id > 0) for token_id in sentence]
attention_masks.append(att_mask)
return np.asarray(attention_masks)
train_ids = tokenize_sentences(x_train, tokenizer, max_seq_len = 128)
train_ids = pad_sequences(train_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
train_masks = create_attention_masks(train_ids)
valid_ids = tokenize_sentences(x_valid, tokenizer, max_seq_len = 128)
valid_ids = pad_sequences(valid_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
valid_masks = create_attention_masks(valid_ids)
test_ids = tokenize_sentences(x_test, tokenizer, max_seq_len = 128)
test_ids = pad_sequences(test_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
test_masks = create_attention_masks(test_ids)
create the datasets
def create_dataset(ids, masks, labels):
def gen():
for i in range(len(ids)):
yield (
{
"input_ids": ids[i],
"attention_mask": masks[i]
},
labels[i],
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None])
},
tf.TensorShape([None]),
),
)
train_dataset = create_dataset(train_ids, train_masks, y_train)
valid_dataset = create_dataset(valid_ids, valid_masks, y_valid)
test_dataset = create_dataset(test_ids, test_masks, y_test)
that is how the data looks like
for item in train_dataset.take(1):
print(item)
Approach 1
class TFBertForMultilabelClassification(TFBertPreTrainedModel):
def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultilabelClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer='random_normal', #get_initializer(config.initializer_range),
name='classifier',
activation='sigmoid')
def call(self, inputs, **kwargs):
outputs = self.bert(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
return outputs # logits, (hidden_states), (attentions)
NUM_LABELS = len(y_train[0])
model = TFBertForMultilabelClassification.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,epsilon=1e-08, clipnorm=1)
# we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy
loss = tf.keras.losses.BinaryCrossentropy()
metric = tf.keras.metrics.CategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
with an error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-36-8aa1173bef76> in <module>()
4 metric = tf.keras.metrics.CategoricalAccuracy()
5 model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
----> 6 history = model.fit(train_dataset, epochs=1, validation_data=valid_dataset)
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1145, in train_step
if list(y_pred.keys())[0] == "loss":
AttributeError: 'tuple' object has no attribute 'keys'
Approach 2:
MODEL_NAME_OR_PATH = 'HooshvareLab/bert-fa-base-uncased'
NUM_LABELS = 7
from transformers import TFBertForSequenceClassification, BertConfig
model = TFBertForSequenceClassification.from_pretrained(
MODEL_NAME_OR_PATH,
config=BertConfig.from_pretrained(MODEL_NAME_OR_PATH, num_labels=NUM_LABELS, problem_type="multi_label_classification")
)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset, epochs=1, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
and the error
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 1151, in train_step
loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1863, in sparse_categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/keras/backend.py", line 5203, in sparse_categorical_crossentropy
labels=target, logits=output)
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [128,7] and labels shape [7]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_67923]

How to skip problematic hyperparameter combinations when tuning models using Keras Tuner?

When using Keras Tuner, there doesn't seem to be a way to allow the skipping of a problematic combination of hyperparams. For example, the number of filters in a Conv1D layer may not be compatible with all values of pool size in the following MaxPooling1D layer and thus lead to an error in model building. However, this may not be known before running the tuner. Once the tuner is run, this will lead to an error that will terminate the whole tuning process. Is there a way to skip any hyperparam combinations that result in an error?
Sample code:
def model_builder(hp):
model = Sequential()
model.add(
Embedding(
input_dim=hp.Int(
'vocab_size',
min_value=4000,
max_value=10000,
step=1000,
default=4000
),
output_dim=hp.Choice(
'embedding_dim',
values=[32, 64, 128, 256],
default=32
),
input_length=hp.Int(
'max_length',
min_value=50,
max_value=200,
step=10,
default=50
)
)
)
model.add(
Conv1D(
filters=hp.Choice(
'num_filters_1',
values=[32, 64],
default=32
),
kernel_size=hp.Choice(
'kernel_size_1',
values=[3, 5, 7, 9],
default=7
),
activation='relu'
)
)
model.add(
MaxPooling1D(
pool_size=hp.Choice(
'pool_size',
values=[3, 5],
default=5
)
)
)
model.add(
Conv1D(
filters=hp.Choice(
'num_filters_2',
values=[32, 64],
default=32
),
kernel_size=hp.Choice(
'kernel_size_2',
values=[3, 5, 7, 9],
default=7
),
activation='relu'
)
)
model.add(
GlobalMaxPooling1D()
)
model.add(
Dropout(
rate=hp.Float(
'dropout_1',
min_value=0.0,
max_value=0.5,
default=0.5,
step=0.05
)
)
)
model.add(
Dense(
units=hp.Int(
'units',
min_value=10,
max_value=100,
step=10,
default=10
),
kernel_regularizer=tf.keras.regularizers.l2(
hp.Float(
'regularizer_1',
min_value=1e-4,
max_value=1e-1,
sampling='LOG',
default=1e-2
)
),
activation='relu'
)
)
model.add(
Dropout(
hp.Float(
'dropout_2',
min_value=0.0,
max_value=0.5,
default=0.5,
step=0.05
)
)
)
model.add(
Dense(
1,
kernel_regularizer=tf.keras.regularizers.l2(
hp.Float(
'regularizer_2',
min_value=1e-4,
max_value=1e-1,
sampling='LOG',
default=1e-2
)
),
activation='sigmoid'
)
)
model.compile(
loss='binary_crossentropy',
optimizer=hp.Choice(
'optimizer',
values=['rmsprop', 'adam', 'sgd']
),
metrics=['accuracy']
)
return model
tuner = kt.Hyperband(
model_builder,
objective='val_accuracy',
max_epochs=20,
#factor=3,
directory='my_dir',
project_name='cec',
seed=seed
)
class ClearTrainingOutput(tf.keras.callbacks.Callback):
def on_train_end(*args, **kwargs):
IPython.display.clear_output(wait=True)
tuner.search(
X_train,
y_train,
epochs=20,
validation_data=(X_test, y_test),
callbacks=[ClearTrainingOutput()]
)
The error message:
Epoch 1/3
WARNING:tensorflow:Model was constructed with shape (None, 150) for input Tensor("embedding_input:0", shape=(None, 150), dtype=float32), but it was called on an input with incompatible shape (32, 50).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-62-16a1eae457d8> in <module>
3 IPython.display.clear_output(wait=True)
4
----> 5 tuner.search(
6 X_train,
7 y_train,
~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
128
129 self.on_trial_begin(trial)
--> 130 self.run_trial(trial, *fit_args, **fit_kwargs)
131 self.on_trial_end(trial)
132 self.on_search_end()
~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/tuners/hyperband.py in run_trial(self, trial, *fit_args, **fit_kwargs)
385 fit_kwargs['epochs'] = hp.values['tuner/epochs']
386 fit_kwargs['initial_epoch'] = hp.values['tuner/initial_epoch']
--> 387 super(Hyperband, self).run_trial(trial, *fit_args, **fit_kwargs)
388
389 def _build_model(self, hp):
~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/multi_execution_tuner.py in run_trial(self, trial, *fit_args, **fit_kwargs)
94
95 model = self.hypermodel.build(trial.hyperparameters)
---> 96 history = model.fit(*fit_args, **copied_fit_kwargs)
97 for metric, epoch_values in history.history.items():
98 if self.oracle.objective.direction == 'min':
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
846 batch_size=batch_size):
847 callbacks.on_train_batch_begin(step)
--> 848 tmp_logs = train_function(iterator)
849 # Catch OutOfRangeError for Datasets of unknown size.
850 # This blocks until the batch has finished executing.
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
578 xla_context.Exit()
579 else:
--> 580 result = self._call(*args, **kwds)
581
582 if tracing_count == self._get_tracing_count():
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
625 # This is the first call of __call__, so we have to initialize.
626 initializers = []
--> 627 self._initialize(args, kwds, add_initializers_to=initializers)
628 finally:
629 # At this point we know that the initialization is complete (or less
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
503 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
504 self._concrete_stateful_fn = (
--> 505 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
506 *args, **kwds))
507
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2444 args, kwargs = None, None
2445 with self._lock:
-> 2446 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2447 return graph_function
2448
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2775
2776 self._function_cache.missed.add(call_context_key)
-> 2777 graph_function = self._create_graph_function(args, kwargs)
2778 self._function_cache.primary[cache_key] = graph_function
2779 return graph_function, args, kwargs
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2655 arg_names = base_arg_names + missing_arg_names
2656 graph_function = ConcreteFunction(
-> 2657 func_graph_module.func_graph_from_py_func(
2658 self._name,
2659 self._python_function,
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
979 _, original_func = tf_decorator.unwrap(python_func)
980
--> 981 func_outputs = python_func(*func_args, **func_kwargs)
982
983 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
439 # __wrapped__ allows AutoGraph to swap in a converted function. We give
440 # the function a weak reference to itself to avoid a reference cycle.
--> 441 return weak_wrapped_fn().__wrapped__(*args, **kwds)
442 weak_wrapped_fn = weakref.ref(wrapped_fn)
443
~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:531 train_step **
y_pred = self(x, training=True)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:277 call
return super(Sequential, self).call(inputs, training=training, mask=mask)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:717 call
return self._run_internal_graph(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:888 _run_internal_graph
output_tensors = layer(computed_tensors, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:207 call
outputs = self._convolution_op(inputs, self.kernel)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1106 __call__
return self.conv_op(inp, filter)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:638 __call__
return self.call(inp, filter)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:231 __call__
return self.conv_op(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:220 _conv1d
return conv1d(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
return func(*args, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
return func(*args, **kwargs)
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1655 conv1d
result = gen_nn_ops.conv2d(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/gen_nn_ops.py:965 conv2d
_, _, _op, _outputs = _op_def_library._apply_op_helper(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:742 _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:3319 _create_op_internal
ret = Operation(
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1816 __init__
self._c_op = _create_c_op(self._graph, node_def, inputs,
/home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1657 _create_c_op
raise ValueError(str(e))
ValueError: Negative dimension size caused by subtracting 7 from 6 for '{{node sequential/conv1d_1/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential/conv1d_1/conv1d/ExpandDims, sequential/conv1d_1/conv1d/ExpandDims_1)' with input shapes: [32,1,6,32], [1,7,32,32].
I myself have been looking for a solution to this problem for a very long time and found it. Yes, not very elegant, but it works. I'll leave it here, maybe it will help someone else.
The point is to wrap the model construction in a try-except block and, if a Value Error occurs, build a highly simplified model. The important thing here is to create your own loss function that would return too large a loss value, which we could catch with our own callback function and stop training the model (code with an example for convolutional neural networks attached)
P.S. It would be possible to make your own loss function return NaN, and catch it with a ready-made callback function TerminateOnNaN(). But for some reason keras-tuner thinks that NaN < any number, and therefore it will give the value NaN for best val_loss
def invalid_loss(y_true, y_pred):
return keras.losses.BinaryCrossentropy()(y_true, y_pred) + 2000000
def invalid_model():
model = keras.Sequential()
model.add(layers.Input((input_shape)))
model.add(layers.Resizing(height=2, width=2))
model.add(layers.Conv2D(filters=1,
kernel_size=2,
activation='relu',
))
model.add(layers.GlobalMaxPooling2D())
model.add(layers.Dense(units=output_shape,
activation="sigmoid",
))
model.compile(optimizer="Adam",
loss=invalid_loss,
metrics=[metrics.BinaryAccuracy()])
return model
def build_model(hp):
try:
model = keras.Sequential()
model.add(layers.Input((input_shape)))
...
model.add(layers.Dense(units=output_shape,
activation=dense_activation))
model.compile(optimizer="Adam",
loss=losses.BinaryCrossentropy(),
metrics=[metrics.BinaryAccuracy()])
except ValueError:
model = invalid_model()
return model
And here is an example of your own callback, which would stop training so as not to waste time on a "invalid" model
class EarlyStoppingByLoss(keras.callbacks.Callback):
def __init__(self, max_loss):
self.max_loss = max_loss
def on_train_batch_end(self, batch, logs=None):
if logs["loss"] >= self.max_loss:
self.model.stop_training = True
You can also control oversized models (for example, if you use Flatten() when switching from convolutional layers to fully connected ones)
from keras.utils.layer_utils import count_params
class EarlyStoppingByModelSize(keras.callbacks.Callback):
def __init__(self, max_size):
self.max_size = max_size
def on_train_begin(self, logs=None):
trainable_count = count_params(self.model.trainable_weights)
if trainable_count > self.max_size:
self.model.stop_training = True
And, accordingly, at the end we add these callbacks to the list and use them when training the model
callbacks = []
callbacks.append(EarlyStoppingByLoss(900000))
callbacks.append(EarlyStoppingByModelSize(12000000))
tuner.search(x=x_train,
y=y_train,
epochs=epochs,
validation_data=(x_test, y_test),
callbacks=callbacks,
verbose=1,
batch_size=batch_size)

How to use multiple GPU for a DCGAN using Tensorflow 2.0 - RuntimeError: Replica-local variables may only be assigned in a replica context

I would like to develop a DCGAN with resolution of 256x256. To do so I need to use multiple GPU since only one it is not enough and it will probably take too much time.
I followed the procedure explained in the documentation at this link
https://www.tensorflow.org/beta/guide/distribute_strategy
At the top of the script I used
strategy = tf.distribute.MirroredStrategy()
Then inside the Generator, Discriminator, and Loss functions I used
with strategy.scope():
The error I get is:
RuntimeError: Replica-local variables may only be assigned in a replica context.
strategy = tf.distribute.MirroredStrategy()
path = '/my/dataset/path/'
file_paths = [f for f in glob.glob(path + "**/*.jpg", recursive=True)]
tensor_data = np.zeros((len(file_paths), 256, 256, 3)).astype('float32')
for i in range(len(file_paths)):
img_tensor = tf.image.decode_image(tf.io.read_file(file_paths[i]))
tensor_data[i] = img_tensor
for i in range(tensor_data.shape[0]):
tensor_data[i] = ((tensor_data[i] - 127.5) / 127.5)
BUFFER_SIZE = len(file_paths)
BATCH_SIZE = 256
train_dataset = tf.data.Dataset.from_tensor_slices(tensor_data).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
def make_generator_model():
with strategy.scope():
model = tf.keras.Sequential()
model.add(layers.Dense(64*64*1536, use_bias=False, input_shape=(100,)))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Reshape((64, 64, 1536)))
assert model.output_shape == (None, 64, 64, 1536) # Note: None is the batch size
model.add(layers.Conv2DTranspose(1536, (5, 5), strides=(1, 1), padding='same', use_bias=False))
assert model.output_shape == (None, 64, 64, 1536)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Conv2DTranspose(768, (5, 5), strides=(2, 2), padding='same', use_bias=False))
assert model.output_shape == (None, 128, 128, 768)
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
model.add(layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
assert model.output_shape == (None, 256, 256, 3)
return model
generator = make_generator_model()
noise = tf.random.normal([1, 100])
generated_image = generator(noise, training=False)
sample = generated_image[0, :, :, :];
sample = tf.cast(sample, tf.int32)
plt.imshow(sample, cmap=None)
def make_discriminator_model():
with strategy.scope():
model = tf.keras.Sequential()
model.add(layers.Conv2D(256, (5, 5), strides=(2, 2), padding='same', input_shape=[256, 256, 3]))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
model.add(layers.Flatten())
model.add(layers.Dense(1))
return model
discriminator = make_discriminator_model()
decision = discriminator(generated_image)
print (decision)
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
def discriminator_loss(real_output, fake_output):
with strategy.scope():
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
def generator_loss(fake_output):
with strategy.scope():
return cross_entropy(tf.ones_like(fake_output), fake_output)
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)
checkpoint_dir = './training_checkpoints/'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
discriminator_optimizer=discriminator_optimizer,
generator=generator,
discriminator=discriminator)
EPOCHS = 2000
noise_dim = 100
num_examples_to_generate = 16
seed = tf.random.normal([num_examples_to_generate, noise_dim])
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
#tf.function
def train_step(images):
noise = tf.random.normal([BATCH_SIZE, noise_dim])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise, training=True)
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
def train(dataset, epochs):
for epoch in range(epochs):
start = time.time()
for image_batch in dataset:
train_step(image_batch)
# Produce images for the GIF as we go
display.clear_output(wait=True)
generate_and_save_images(generator,
epoch + 1,
seed)
# Save the model every 15 epochs
os.makedirs(os.path.dirname(checkpoint_prefix), exist_ok=True)
if (epoch + 1) % 50 == 0:
checkpoint.save(file_prefix = checkpoint_prefix)
print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
# Generate after the final epoch
display.clear_output(wait=True)
generate_and_save_images(generator,epochs,seed)
def generate_and_save_images(model, epoch, test_input):
# Notice `training` is set to False.
# This is so all layers run in inference mode (batchnorm).
predictions = model(test_input, training=False)
fig = plt.figure(figsize=(4,4))
for i in range(predictions.shape[0]):
plt.subplot(8, 8, i+1)
sample = predictions[i, :, :, :] * 127.5 + 127.5
sample = tf.cast(sample, tf.int32)
plt.imshow(sample, cmap=None)
plt.axis('off')
filename = './screens/eye-256x256/1/image_at_epoch_{:04d}.png'
os.makedirs(os.path.dirname(filename), exist_ok=True)
if (epoch + 1) % 10 == 0:
plt.savefig(filename.format(epoch))
plt.show()
get_ipython().run_cell_magic('time', '', 'train(train_dataset, EPOCHS)')
The error is the following
Executing op ExperimentalRebatchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ExperimentalAutoShardDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MultiDeviceIterator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MultiDeviceIteratorInit in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MultiDeviceIteratorToStringHandle in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op GeneratorDataset in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op GeneratorDataset in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AnonymousIteratorV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op AnonymousIteratorV2 in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op IteratorGetNextSync in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op IteratorGetNextSync in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op DestroyResourceOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op DeleteIterator in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op DeleteIterator in device /job:localhost/replica:0/task:0/device:GPU:0
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-20-88a9879432c7> in train(dataset, epochs)
4
5 for image_batch in dataset:
----> 6 train_step(image_batch)
7
8 # Produce images for the GIF as we go
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
414 # This is the first call of __call__, so we have to initialize.
415 initializer_map = {}
--> 416 self._initialize(args, kwds, add_initializers_to=initializer_map)
417 if self._created_variables:
418 try:
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
357 self._concrete_stateful_fn = (
358 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 359 *args, **kwds))
360
361 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1358 if self.input_signature:
1359 args, kwargs = None, None
-> 1360 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1361 return graph_function
1362
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
1646 graph_function = self._function_cache.primary.get(cache_key, None)
1647 if graph_function is None:
-> 1648 graph_function = self._create_graph_function(args, kwargs)
1649 self._function_cache.primary[cache_key] = graph_function
1650 return graph_function, args, kwargs
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
1539 arg_names=arg_names,
1540 override_flat_arg_shapes=override_flat_arg_shapes,
-> 1541 capture_by_value=self._capture_by_value),
1542 self._function_attributes)
1543
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
714 converted_func)
715
--> 716 func_outputs = python_func(*func_args, **func_kwargs)
717
718 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.5/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
307 # __wrapped__ allows AutoGraph to swap in a converted function. We give
308 # the function a weak reference to itself to avoid a reference cycle.
--> 309 return weak_wrapped_fn().__wrapped__(*args, **kwds)
310 weak_wrapped_fn = weakref.ref(wrapped_fn)
311
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
704 except Exception as e: # pylint:disable=broad-except
705 if hasattr(e, "ag_error_metadata"):
--> 706 raise e.ag_error_metadata.to_exception(type(e))
707 else:
708 raise
RuntimeError: in converted code:
<ipython-input-19-d2ffe8a85706>:9 train_step *
generated_images = generator(noise, training=True)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py:667 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/sequential.py:248 call
return super(Sequential, self).call(inputs, training=training, mask=mask)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/network.py:753 call
return self._run_internal_graph(inputs, training=training, mask=mask)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/network.py:895 _run_internal_graph
output_tensors = layer(computed_tensors, **kwargs)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py:667 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/normalization.py:782 call
self.add_update(mean_update)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py:507 new_func
return func(*args, **kwargs)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py:1095 add_update
update()
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/normalization.py:775 mean_update
return tf_utils.smart_cond(training, true_branch, false_branch)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/utils/tf_utils.py:58 smart_cond
pred, true_fn=true_fn, false_fn=false_fn, name=name)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/smart_cond.py:54 smart_cond
return true_fn()
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/normalization.py:773 <lambda>
true_branch = lambda: _do_update(self.moving_mean, new_mean)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/normalization.py:769 _do_update
inputs_size)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/normalization.py:458 _assign_moving_average
return state_ops.assign_sub(variable, update_delta, name=scope)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/state_ops.py:164 assign_sub
return ref.assign_sub(value)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/distribute/values.py:1394 assign_sub
_assert_replica_context(self._distribute_strategy)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/distribute/values.py:1381 _assert_replica_context
"Replica-local variables may only be assigned in a replica context.")
RuntimeError: Replica-local variables may only be assigned in a replica context.
You need to distribute your dataset aslo, for details please refer to this URL.
- https://www.tensorflow.org/beta/tutorials/distribute/training_loops
train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
every part of your model creates under strategy scope like optimizer also.
with strategy.scope():
optimizer = tf.keras.optimizers.Adam()