Why am I getting the " 'NoneType' object is not callable " error when I apply model.fit in tensorflow? - tensorflow

I'm getting the following error why I apply model.fit. I've described my model and other variables after this error.
--------------------------------------------------------------------------- TypeError Traceback (most recent call
last) in
----> 1 results = model.fit({'title_input': sequences_matrix_train_t, 'body_input': sequences_matrix_train_b},
2 {'main_output': y_train, 'aux_output': y_train},
3 validation_data=[{'title_input': sequences_matrix_test_t, 'body_input': sequences_matrix_test_b},
4 {'main_output': y_test, 'aux_output': y_test}],
5 epochs=5, batch_size=800)
~/Library/Python/3.8/lib/python/site-packages/tensorflow/python/keras/engine/training.py
in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside run_distribute_coordinator already.
~/Library/Python/3.8/lib/python/site-packages/tensorflow/python/keras/engine/training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_batch_size, validation_freq, max_queue_size, workers,
use_multiprocessing) 1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator) 1099 if data_handler.should_sync: 1100
context.async_wait()
~/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/def_function.py
in call(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/def_function.py
in _call(self, *args, **kwds)
805 # In this case we have created variables on the first call, so we run the
806 # defunned version which is guaranteed to never create variables.
--> 807 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
808 elif self._stateful_fn is not None:
809 # Release the lock early so that multiple threads can perform the call
TypeError: 'NoneType' object is not callable
Getting the above error when I fit my model. The model is described below:-
def RNN():
# RNN part for the title column
title_input = Input(name='title_input',shape=[max_len_of_title_tokens])
title_Embed = Embedding(vocab_len_t+1,2000,input_length=max_len_of_title_tokens,mask_zero=True,name='title_Embed')(title_input)
gru_out_t = GRU(300)(title_Embed)
# Using dense method for auxillary output to smoothen the effects of the GRU
auxiliary_output = Dense(10, activation='sigmoid', name='aux_output')(gru_out_t)
# RNN part for the body column
body_input = Input(name='body_input',shape=[max_len_of_body_tokens])
body_Embed = Embedding(vocab_len_b+1,170,input_length=max_len_of_body_tokens,mask_zero=True,name='body_Embed')(body_input)
gru_out_b = GRU(200)(body_Embed)
# combining these two
combined = concatenate([gru_out_t, gru_out_b])
# Feeding combined to our remaining two dense layers
dense1 = Dense(400,activation='relu')(combined)
dropout = Dropout(0.5)(dense1)
batchNormalizer = BatchNormalization()(dropout)
dense2 = Dense(150,activation='relu')(batchNormalizer)
main_output = Dense(10, activation='sigmoid', name='main_output')(dense2)
model = Model(inputs=[title_input, body_input],outputs=[main_output, auxiliary_output])
return model
#python function ends, stack overflow wouldn't let me indent lol.
#below is the code outside the 'RNN' function.
model = RNN() #this 'model' variable is the one over which I apply the 'fit' method.
#Here's how I'm applying the fit method:-
results = model.fit({'title_input': sequences_matrix_train_t, 'body_input': sequences_matrix_train_b},
{'main_output': y_train, 'aux_output': y_train},
validation_data=[{'title_input': sequences_matrix_test_t, 'body_input': sequences_matrix_test_b},
{'main_output': y_test, 'aux_output': y_test}],
epochs=5, batch_size=800)
Can someone please look over my error and help me figure out why it's being caused.
Here's my jupyter notebook for more info – https://github.com/datares/teamRed/blob/main/post_classification.ipynb

Related

unexpected keyword argument 'sample_weight' when sub-classing tensor-flow loss class (categorical_crossentropy) to created a weighted loss function

Struggling to get a sub-classed loss function to work in Tensorflow (2.2.0).
Initially tried this code (which I know has worked for others - see https://github.com/keras-team/keras/issues/2115#issuecomment-530762739):
import tensorflow.keras.backend as K
from tensorflow.keras.losses import CategoricalCrossentropy
class WeightedCategoricalCrossentropy(CategoricalCrossentropy):
def __init__(self, cost_mat, name='weighted_categorical_crossentropy', **kwargs):
assert(cost_mat.ndim == 2)
assert(cost_mat.shape[0] == cost_mat.shape[1])
super().__init__(name=name, **kwargs)
self.cost_mat = K.cast_to_floatx(cost_mat)
def __call__(self, y_true, y_pred):
return super().__call__(
y_true=y_true,
y_pred=y_pred,
sample_weight=get_sample_weights(y_true, y_pred, self.cost_mat),
)
def get_sample_weights(y_true, y_pred, cost_m):
num_classes = len(cost_m)
y_pred.shape.assert_has_rank(2)
y_pred.shape[1].assert_is_compatible_with(num_classes)
y_pred.shape.assert_is_compatible_with(y_true.shape)
y_pred = K.one_hot(K.argmax(y_pred), num_classes)
y_true_nk1 = K.expand_dims(y_true, 2)
y_pred_n1k = K.expand_dims(y_pred, 1)
cost_m_1kk = K.expand_dims(cost_m, 0)
sample_weights_nkk = cost_m_1kk * y_true_nk1 * y_pred_n1k
sample_weights_n = K.sum(sample_weights_nkk, axis=[1, 2])
return sample_weights_n
Used as follows:
model.compile(optimizer='adam',
loss={'simple_Class': 'categorical_crossentropy',
'soundClass': 'binary_crossentropy',
'auxiliary_soundClass':'binary_crossentropy',
'auxiliary_class_training': WeightedCategoricalCrossentropy(cost_matrix),
'class_training':WeightedCategoricalCrossentropy(cost_matrix)
},
loss_weights={'simple_Class': 1.0,
'soundClass': 1.0,
'auxiliary_soundClass':0.7,
'auxiliary_class_training': 0.7,
'class_training':0.4})
(where cost_matrix is a 2-dimensional numpy array). Training trough model.fit() with batch_size=512.
However, this results in the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-21-3428d6d8967a> in <module>()
82 'class_training': class_lables_test}),
83
---> 84 epochs=nb_epoch, batch_size=batch_size, initial_epoch=initial_epoch, verbose=0, shuffle=True, callbacks=[se, tb, cm, mc, es, rs])
85
86 #model.save(save_version_dir,save_format='tf')
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
846 batch_size=batch_size):
847 callbacks.on_train_batch_begin(step)
--> 848 tmp_logs = train_function(iterator)
849 # Catch OutOfRangeError for Datasets of unknown size.
850 # This blocks until the batch has finished executing.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
578 xla_context.Exit()
579 else:
--> 580 result = self._call(*args, **kwds)
581
582 if tracing_count == self._get_tracing_count():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
625 # This is the first call of __call__, so we have to initialize.
626 initializers = []
--> 627 self._initialize(args, kwds, add_initializers_to=initializers)
628 finally:
629 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
504 self._concrete_stateful_fn = (
505 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 506 *args, **kwds))
507
508 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2444 args, kwargs = None, None
2445 with self._lock:
-> 2446 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2447 return graph_function
2448
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2775
2776 self._function_cache.missed.add(call_context_key)
-> 2777 graph_function = self._create_graph_function(args, kwargs)
2778 self._function_cache.primary[cache_key] = graph_function
2779 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2665 arg_names=arg_names,
2666 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2667 capture_by_value=self._capture_by_value),
2668 self._function_attributes,
2669 # Tell the ConcreteFunction to clean up its graph once it goes out of
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
979 _, original_func = tf_decorator.unwrap(python_func)
980
--> 981 func_outputs = python_func(*func_args, **func_kwargs)
982
983 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
439 # __wrapped__ allows AutoGraph to swap in a converted function. We give
440 # the function a weak reference to itself to avoid a reference cycle.
--> 441 return weak_wrapped_fn().__wrapped__(*args, **kwds)
442 weak_wrapped_fn = weakref.ref(wrapped_fn)
443
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
TypeError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:533 train_step **
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:205 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
TypeError: __call__() got an unexpected keyword argument 'sample_weight'
This problem is resolved when I replace the __call__() magic methods with call() and implement some of the underlying logic manually. This works, with the same usage. The __call__ method is changed to:
def call(self, y_true, y_pred):
return super().call(y_true, y_pred) * get_sample_weights(y_true, y_pred, self.cost_mat)
i.e. we calculate a categorical cross-entropy loss on y_true and y_pred and then multiply against our weight matrix directly, rather than passing y_true, y_pred and self-cost_mat to the categorical cross-entropy call method and use the inherited method's own logic for multiplying the loss by the weights. This isn't a massive problem, as the code does work - but I can't figure out why I was unable to use the inherited class' own __call__ implementation properly (as per the original code).
Also I changed y_pred.shape[1].assert_is_compatible_with(num_classes) to assert(y_pred.shape[1] == num_classes) - this was because y_pred.shape[1] was returning an int. I have no idea why, as, inspecting y_pred, it is, of course, a tf.Tensor, and so, .shape[1] should return a tf.TesnorShape object, upon which .assert_is_compatible_with() could be called on.
This is the whole class implementation that I've used successfully.
Note - it includes from_config and get_config methods, alongside an explicit assignment to the Keras loss namespace (last line) to enable whole-model + optimizer state saving through model.save(save_format='tf'). Some of this functionality was challenging to get working: I had to implement an explicit cast to a NumPy array (see the first line of __init__ method).
class WeightedCategoricalCrossentropy(tensorflow.keras.losses.CategoricalCrossentropy):
def __init__(self, cost_mat, name='weighted_categorical_crossentropy', **kwargs):
cost_mat = np.array(cost_mat)
## when loading from config, self.cost_mat returns as a list, rather than an numpy array.
## Adding the above line fixes this issue, enabling .ndim to call sucessfully.
## However, this is probably not the best implementation
assert(cost_mat.ndim == 2)
assert(cost_mat.shape[0] == cost_mat.shape[1])
super().__init__(name=name, **kwargs)
self.cost_mat = K.cast_to_floatx(cost_mat)
def call(self, y_true, y_pred):
return super().call(y_true, y_pred) * get_sample_weights(y_true, y_pred, self.cost_mat)
def get_config(self):
config = super().get_config().copy()
# Calling .update on the line above, during assignment, causes an error with config becoming None-type.
config.update({'cost_mat': (self.cost_mat)})
return config
#classmethod
def from_config(cls, config):
# something goes wrong here and changes self.cost_mat to a list variable.
# See above for temporary fix
return cls(**config)
def get_sample_weights(y_true, y_pred, cost_m):
num_classes = len(cost_m)
y_pred.shape.assert_has_rank(2)
assert(y_pred.shape[1] == num_classes)
y_pred.shape.assert_is_compatible_with(y_true.shape)
y_pred = K.one_hot(K.argmax(y_pred), num_classes)
y_true_nk1 = K.expand_dims(y_true, 2)
y_pred_n1k = K.expand_dims(y_pred, 1)
cost_m_1kk = K.expand_dims(cost_m, 0)
sample_weights_nkk = cost_m_1kk * y_true_nk1 * y_pred_n1k
sample_weights_n = K.sum(sample_weights_nkk, axis=[1, 2])
return sample_weights_n
tf.keras.losses.WeightedCategoricalCrossentropy = WeightedCategoricalCrossentropy
Finally, saving the model is implemented like so:
model.save(save_version_dir,save_format='tf')
and loading the model as follows:
model = tf.keras.models.load_model(
save_version_dir,
compile=True,
custom_objects={
'WeightedCategoricalCrossentropy': WeightedCategoricalCrossentropy(cost_matrix)
}
)
As per the comments; the issue here is that TensorFlow is now enforcing inheriting from the original method signature.
The following has been tested (by comparing equal weighting in the cost_matrix to weighting all but a single category to nothing) on a toy problem and works:
class WeightedCategoricalCrossentropy(tf.keras.losses.CategoricalCrossentropy):
def __init__(self, cost_mat, name='weighted_categorical_crossentropy', **kwargs):
cost_mat = np.array(cost_mat)
## when loading from config, self.cost_mat returns as a list, rather than an numpy array.
## Adding the above line fixes this issue, enabling .ndim to call sucessfully.
## However, this is probably not the best implementation
assert(cost_mat.ndim == 2)
assert(cost_mat.shape[0] == cost_mat.shape[1])
super().__init__(name=name, **kwargs)
self.cost_mat = K.cast_to_floatx(cost_mat)
def __call__(self, y_true, y_pred, sample_weight=None):
assert sample_weight is None, "should only be derived from the cost matrix"
return super().__call__(
y_true=y_true,
y_pred=y_pred,
sample_weight=get_sample_weights(y_true, y_pred, self.cost_mat),
)
def get_config(self):
config = super().get_config().copy()
# Calling .update on the line above, during assignment, causes an error with config becoming None-type.
config.update({'cost_mat': (self.cost_mat)})
return config
#classmethod
def from_config(cls, config):
# something goes wrong here and changes self.cost_mat to a list variable.
# See above for temporary fix
return cls(**config)
def get_sample_weights(y_true, y_pred, cost_m):
num_classes = len(cost_m)
y_pred.shape.assert_has_rank(2)
assert(y_pred.shape[1] == num_classes)
y_pred.shape.assert_is_compatible_with(y_true.shape)
y_pred = K.one_hot(K.argmax(y_pred), num_classes)
y_true_nk1 = K.expand_dims(y_true, 2)
y_pred_n1k = K.expand_dims(y_pred, 1)
cost_m_1kk = K.expand_dims(cost_m, 0)
sample_weights_nkk = cost_m_1kk * y_true_nk1 * y_pred_n1k
sample_weights_n = K.sum(sample_weights_nkk, axis=[1, 2])
return sample_weights_n
# Register the loss in the Keras namespace to enable loading of the custom object.
tf.keras.losses.WeightedCategoricalCrossentropy = WeightedCategoricalCrossentropy
Usage
Where cost_matrix is a 2D NumPy array, eg:
[
[ Weight Category 1 predicted as Category 1,
Weight Category 1 predicted as Category 2,
Weight Category 1 predicted as Category 3 ]
[ Weight Category 2 predicted as Category 1,
...,
... ]
[ ...,
...,
Weight Category 3 predicted as Category 3 ]
]
model.compile(
optimizer='adam',
loss=WeightedCategoricalCrossentropy(cost_matrix)
)
Model Saving
model.save(save_version_dir,save_format='tf')
Model Loading
model = tf.keras.models.load_model(
save_version_dir,
compile=True,
custom_objects={
'WeightedCategoricalCrossentropy': WeightedCategoricalCrossentropy(cost_matrix)
}
)

Error when checking input: expected dense_203_input to have shape (1202,) but got array with shape (1,)

I've made a very simple neural network, which is meant to do reinforcement learning. However, I cannot predict anything as I get an error when trying to predict.
Error in question:
Error when checking input: expected dense_203_input to have shape (1202,) but got array with shape (1,)
Model in questions:
def _build_compile_model(self):
model = Sequential()
model.add(Dense(300, activation='relu', input_dim=1202))
model.add(Dense(300, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(self._action_size, activation='softmax'))
model.compile(loss='mse', optimizer=self._optimizer)
return model
error occurs when calling model.predict(state) where state is an array of shape (1202, 1).
Full error message is
ValueError Traceback (most recent call last)
<ipython-input-148-06b7a01facef> in <module>
18 new_state, reward = env.step(action, new_demand_a, new_demand_b) # Take action, get new state and reward
19 new_state = np.reshape(new_state, [1202, -1])
---> 20 agent.update(old_state, new_state, action, reward) # Let the agent update internal
21 average_reward.append(reward) # Keep score
22 if i % 100 == 0 and i != 0: # Print out metadata every 100th iteration
<ipython-input-145-142ae54ce43f> in update(self, old_state, new_state, action, reward)
49 def update(self, old_state, new_state, action, reward):
50 print(old_state.shape)
---> 51 target = self.q_network.predict(old_state)
52 t = self.target_network.predict(new_state)
53 target[0][action] = reward + self.gamma * np.amax(t)
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
1011 max_queue_size=max_queue_size,
1012 workers=workers,
-> 1013 use_multiprocessing=use_multiprocessing)
1014
1015 def reset_metrics(self):
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in predict(self, model, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)
496 model, ModeKeys.PREDICT, x=x, batch_size=batch_size, verbose=verbose,
497 steps=steps, callbacks=callbacks, max_queue_size=max_queue_size,
--> 498 workers=workers, use_multiprocessing=use_multiprocessing, **kwargs)
499
500
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _model_iteration(self, model, mode, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)
424 max_queue_size=max_queue_size,
425 workers=workers,
--> 426 use_multiprocessing=use_multiprocessing)
427 total_samples = _get_total_number_of_samples(adapter)
428 use_sample = total_samples is not None
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
644 standardize_function = None
645 x, y, sample_weights = standardize(
--> 646 x, y, sample_weight=sample_weights)
647 elif adapter_cls is data_adapter.ListsOfScalarsDataAdapter:
648 standardize_function = standardize
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2381 is_dataset=is_dataset,
2382 class_weight=class_weight,
-> 2383 batch_size=batch_size)
2384
2385 def _standardize_tensors(self, x, y, sample_weight, run_eagerly, dict_inputs,
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_tensors(self, x, y, sample_weight, run_eagerly, dict_inputs, is_dataset, class_weight, batch_size)
2408 feed_input_shapes,
2409 check_batch_axis=False, # Don't enforce the batch size.
-> 2410 exception_prefix='input')
2411
2412 # Get typespecs for the input data and sanitize it if necessary.
/opt/conda/envs/tensorflow2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
580 ': expected ' + names[i] + ' to have shape ' +
581 str(shape) + ' but got array with shape ' +
--> 582 str(data_shape))
583 return data
584
ValueError: Error when checking input: expected dense_211_input to have shape (1202,) but got array with shape (1,)
There are two approaches when feeding inputs on your model:
1st Option: Using the input_shape
model.add(Dense(300, activation='relu', input_shape=(1202,1)))
Here the input shape is in 2D, but you should feed your network a 3D input (Rank 3) since you need to include the batch_size.
Example input:
state = np.array(np.ones((BATCH_SIZE,1202,1)))
print("Input Rank: {}".format(tf.rank(state))) # Check for the Rank of Input
2nd Option: Using the input_dim
model_dim.add(Dense(300, activation='relu', input_dim=1202))
Here the input shape is in 1D, but you should feed your network a 2D input (Rank 2) since you need to include the batch_size.
Example input :
state = np.array(np.ones((1,1202,)))
print("Input Rank: {}".format(tf.rank(state))) # Check for the Rank of Input

Keras: Using weights for NCE loss

So here is the model with the standard loss function.
target = Input(shape=(1, ), dtype='int32')
w_inputs = Input(shape=(1, ), dtype='int32')
w_emb = Embedding(V, dim, embeddings_initializer='glorot_uniform',name='word_emb')(w_inputs)
w_flat= Flatten()(w_emb)
# context
w1= Dense(input_dim=dim, units=V, activation='softmax') # because I want to use predicition on valid set)
w= w1(w_flat)
model = Model(inputs=[w_inputs], outputs=[w])
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd',metrics=['accuracy'])
It works fine. Given NCE loss isnt available in keras, I wrote up a custom loss.
def model_loss(layer,labels, inputs, num_sampled, num_classes, num_true):
weights= K.transpose( layer.get_weights()[0])
biases = layer.get_weights()[1]
def loss(y_true, y_pred):
if K.learning_phase() == 1:
compute_loss = tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true,
partition_strategy="div")
else:
logits = tf.matmul(K.squeeze(inputs,axis=0), K.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, num_classes)
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=labels_one_hot[:][0][:],
logits=logits)
compute_loss = tf.reduce_sum(loss, axis=1)
return compute_loss
return loss
And changed the last line to:
model.compile(loss=model_loss(w1,target, w_emb, num_sampled, num_classes, num_true), optimizer='sgd',metrics=['accuracy'])
This compiles by the way.
And on execution dies.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-68-d3b3ef93b81b> in <module>
3 epochs=epochs, steps_per_epoch = seq_len,
4
----> 5 verbose=1, max_queue_size=15)
/opt/conda/lib/python3.6/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/opt/conda/lib/python3.6/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1416 use_multiprocessing=use_multiprocessing,
1417 shuffle=shuffle,
-> 1418 initial_epoch=initial_epoch)
1419
1420 #interfaces.legacy_generator_methods_support
/opt/conda/lib/python3.6/site-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
38
39 do_validation = bool(validation_data)
---> 40 model._make_train_function()
41 if do_validation:
42 model._make_test_function()
/opt/conda/lib/python3.6/site-packages/keras/engine/training.py in _make_train_function(self)
507 training_updates = self.optimizer.get_updates(
508 params=self._collected_trainable_weights,
--> 509 loss=self.total_loss)
510 updates = (self.updates +
511 training_updates +
/opt/conda/lib/python3.6/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/opt/conda/lib/python3.6/site-packages/keras/optimizers.py in get_updates(self, loss, params)
182 #interfaces.legacy_get_updates_support
183 def get_updates(self, loss, params):
--> 184 grads = self.get_gradients(loss, params)
185 self.updates = [K.update_add(self.iterations, 1)]
186
/opt/conda/lib/python3.6/site-packages/keras/optimizers.py in get_gradients(self, loss, params)
89 grads = K.gradients(loss, params)
90 if None in grads:
---> 91 raise ValueError('An operation has `None` for gradient. '
92 'Please make sure that all of your ops have a '
93 'gradient defined (i.e. are differentiable). '
ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
The issue is of course, that the weights aren't quite getting updated in the layer, hence the non gradient. How could i do that without making a custom layer? I tried that approach but I give up on measuring things like val_acc using a layer.
It seems like you cannot do it in Keras without Layer's API. You can try this solution using custom layer: Keras NCE Implementation

Custom model in Keras fails to fit on the first run

I have a custom model in Keras. It's essentially RNN model which on every step computes negative sampling loss, i.e. a sigmoid on positive and negative logits.
from tensorflow.keras.layers import Input, Embedding, GRU, dot, Dense, add, concatenate, multiply, Subtract
class rnn_neg_sampling(keras.models.Model):
def __init__(self, vocab_size, dim, seq_length, num_negatives=10):
super(rnn_neg_sampling, self).__init__()
self.num_negatives = num_negatives
self.embedding_input = Embedding(
input_dim=vocab_size+1,
output_dim=dim,
mask_zero=True,
input_length=seq_length
)
self.embedding_output = Embedding(
input_dim=vocab_size+1,
output_dim=dim,
mask_zero=True,
input_length=seq_length
)
self.gru = GRU(dim, return_sequences=True)
def call(self, input):
inputs, targets, negatives = input
embedded_inputs = self.embedding_input(inputs)
# https://keras.io/layers/recurrent/#gru
gru_output = self.gru(embedded_inputs)
embedded_targets = self.embedding_output(targets)
embedded_negatives = self.embedding_output(negatives)
positive_dots = K.expand_dims(multiply([gru_output, embedded_targets]), axis=1)
negative_dots = multiply([K.repeat_elements(K.expand_dims(gru_output, axis=1),
rep=self.num_negatives,
axis=1),
embedded_negatives
])
positive_logits = K.sum(positive_dots, axis=-1)
negative_logits = -K.sum(negative_dots, axis=-1)
logits = K.concatenate([positive_logits, negative_logits], axis=1)
self.add_loss(-K.mean(K.log(K.sigmoid(logits))))
I create the model like this:
neg_number = 3
vector_dim=10
sequence_length = 5
vocab_size = 100
ns_model = rnn_neg_sampling(vocab_size, vector_dim, sequence_length, neg_number)
The data is created like this:
X_inputs = np.random.randint(0, 5, size=(batch_size, sequence_length))
X_targets = np.random.randint(0, 5, size=(batch_size, sequence_length))
X_negatives = np.random.randint(0, 5, size=(batch_size, neg_number, sequence_length))
And I compile and fit it like this:
ns_model.compile(loss=None, optimizer='rmsprop')
history = ns_model.fit([X_inputs, X_targets, X_negatives], epochs=1)
However, it fails with the following log:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-1343-a8447c14a3a4> in <module>
1 ns_model.compile(loss=None, optimizer='rmsprop')
----> 2 history = ns_model.fit([X_inputs, X_targets, X_negatives], epochs=1)
~/miniconda3/envs/ml.crash-course/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
966 validation_steps=validation_steps,
967 validation_freq=validation_freq,
--> 968 steps_name='steps_per_epoch')
969
970 def evaluate(self,
~/miniconda3/envs/ml.crash-course/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
147
148 # Get step function and loop type.
--> 149 f = _make_execution_function(model, mode)
150 use_steps = is_dataset or steps_per_epoch is not None
151 do_validation = val_inputs is not None
~/miniconda3/envs/ml.crash-course/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py in _make_execution_function(model, mode)
490 if model._distribution_strategy:
491 return distributed_training_utils._make_execution_function(model, mode)
--> 492 return model._make_execution_function(mode)
493
494
~/miniconda3/envs/ml.crash-course/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py in _make_execution_function(self, mode)
2156 def _make_execution_function(self, mode):
2157 if mode == ModeKeys.TRAIN:
-> 2158 self._make_fit_function()
2159 return self._fit_function
2160 if mode == ModeKeys.TEST:
~/miniconda3/envs/ml.crash-course/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py in _make_fit_function(self)
2099 ]
2100 self._make_train_function_helper(
-> 2101 '_fit_function', [self.total_loss] + metrics_tensors)
2102
2103 def _make_test_function_helper(self, fn_name, outputs, metric_updates=None):
AttributeError: 'rnn_neg_sampling' object has no attribute 'total_loss'
What is strange that if I run the same snippet again (just launching this snippet in ipython second time) I will get a model fitted:
ns_model.compile(loss=None, optimizer='rmsprop')
history = ns_model.fit([X_inputs, X_targets, X_negatives], epochs=1)
>>>>
W0208 15:14:44.158487 139940057085760 training.py:304] Output "output_1" missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to "output_1".
3/3==============================] - 6s 2s/sample - loss: 0.6930
How can I make a model to fit from the first time?

Keras Layer Concatenation

I'm trying to see how I can create a model in Keras with multiple Embedding Layers and other inputs. Here's how my model is structured(E=Embedding Layer, [....]=Input Layer):
E E [V V V]
\ | /
\ | /
Dense
|
Dense
Here is my code so far:
model_a = Sequential()
model_a.add(Embedding(...))
model_b = Sequential()
model_b.add(Embedding(...))
model_c = Sequential()
model_c.add(Embedding(...))
model_values = Sequential()
model_values.add(Input(...))
classification_model = Sequential()
classification_layers = [
Concatenate([model_a,model_b,model_c, model_values]),
Dense(...),
Dense(...),
Dense(2, activation='softmax')
]
for layer in classification_layers:
classification_model.add(layer)
classification_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classification_model.fit(train_data,one_hot_labels, epochs=1, validation_split=0.2)
However I get the following error:
ValueError: A `Concatenate` layer should be called on a list of at least 2 inputs
I am at a loss at what I'm doing wrong here. Here's the a little more detail for the error log:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-37-d5ab23b17e9d> in <module>()
----> 1 classification_model.fit(train_data,one_hot_labels, epochs=1, validation_split=0.2)
/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
953 sample_weight=sample_weight,
954 class_weight=class_weight,
--> 955 batch_size=batch_size)
956 # Prepare validation data.
957 do_validation = False
/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
674 # to match the value shapes.
675 if not self.inputs:
--> 676 self._set_inputs(x)
677
678 if y is not None:
/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in _set_inputs(self, inputs, outputs, training)
574 assert len(inputs) == 1
575 inputs = inputs[0]
--> 576 self.build(input_shape=(None,) + inputs.shape[1:])
577 return
578
/usr/local/lib/python3.5/dist-packages/keras/engine/sequential.py in build(self, input_shape)
225 self.inputs = [x]
226 for layer in self._layers:
--> 227 x = layer(x)
228 self.outputs = [x]
229
/usr/local/lib/python3.5/dist-packages/keras/engine/base_layer.py in __call__(self, inputs, **kwargs)
430 '`layer.build(batch_input_shape)`')
431 if len(input_shapes) == 1:
--> 432 self.build(input_shapes[0])
433 else:
434 self.build(input_shapes)
/usr/local/lib/python3.5/dist-packages/keras/layers/merge.py in build(self, input_shape)
339 # Used purely for shape validation.
340 if not isinstance(input_shape, list) or len(input_shape) < 2:
--> 341 raise ValueError('A `Concatenate` layer should be called '
342 'on a list of at least 2 inputs')
343 if all([shape is None for shape in input_shape]):
ValueError: A `Concatenate` layer should be called on a list of at least 2 inputs
input1 = Input(input_shape=...)
input2 = Input(...)
input3 = Input(...)
values = Input(...)
out1 = Embedding(...)(input1)
out2 = Embedding(...)(input2)
out3 = Embedding(...)(input3)
#make sure values has a shape compatible with the embedding outputs.
#usually it should have shape (equal_samples, equal_length, features)
joinedInput = Concatenate()([out1,out2,out3,values])
out = Dense(...)(joinedInput)
out = Dense(...)(out)
out = Dense(2, activation='softmax')(out)
model = Model([input1,input2,input3,values], out)
You are missing the 'axis' parameter in the call. If you are concatenating on the last dimension (it's unclear what the dimensions of these embeddings and input tensors are), use:
concatenate([model_a,model_b,model_c, model_values], axis=-1)