Is the a bug in function save_weights/load_weights with custom layer in tensorflow 1.13.1? - tensorflow

It took me more than a day, so frustrated. I doubt this is a bug in Tensorflow 1.13.1 (a stable version).
In summary, I created a custom model in Model Subclassing style, which contained only 1 custom layer. After initial, I dumped its trainable weights to file and restore it, by using save_weights and load_weights functions. The trainable weights before and after saving were different.
I also ran the same test on Tensorflow 2.0.0a0, and it turned out this version did not get this phenomenon.
My custom layer:
class EncodingLayer(tf.keras.layers.Layer):
def __init__(self, out_size):
super().__init__()
self.rnn_layer = tf.keras.layers.GRU(out_size, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
def call(self, X, **kwargs):
output, state = self.rnn_layer(X)
return output, state
This is the main part:
class EncodingModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.encoder_layer = EncodingLayer(out_size=1)
def infer(self, inputs):
output, state = self.encoder_layer(inputs)
return output
if __name__ == '__main__':
# Comment line below for running in TF 2.0
tf.enable_eager_execution()
# shape == (2, 3, 2)
inputs = tf.convert_to_tensor([
[[1., 2.], [2., 3.], [4., 4.]],
[[1., 2.], [2., 3.], [4., 4.]],
])
model = EncodingModel()
# Just for building the graph
model.infer(inputs)
print('Before saving model: ', model.trainable_weights[0].numpy().mean())
model.save_weights('weight')
new_model = EncodingModel()
new_model.infer(inputs)
new_model.load_weights('weight')
print('Loaded model: ', new_model.trainable_weights[0].numpy().mean())
The result when running in TF 1.13.1:
Before saving model: 0.28864467
Loaded model: 0.117300846
The result when running in TF 2.0.0a0:
Before saving model: -0.06922924
Loaded model: -0.06922924
Although the result suggests that it might be a bug, I was not so sure. Since the code is very basic, if a bug like that exists, it should be discovered easily. I did a lot of searching but found no one mentioning about it. Thus, I'd guess there's something I misunderstood :)

Related

Keras layer channel-wise multiplication of scalar and graph plotting

I try to multiply scalar values to each channel in a tensor:
import tensorflow as tf
t = tf.ones([2,3,3,4])
w = tf.constant([1,2,3,4], dtype=tf.float32)
tf.multiply(t,w)
yields
<tf.Tensor: shape=(2, 3, 3, 4), dtype=float32, numpy=
array([[[[1., 2., 3., 4.],
[1., 2., 3., 4.],
[1., 2., 3., 4.]],
...
which is correct.
Now I am trying to wrap that operation inside a keras.layers.Layer, whereby w is a learnable parameter. I also try to plot my model using tf.keras.utils.plot_model(m). I encounter several problems.
Method 1
from tensorflow.keras import Model, Input
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
def call(self, x):
return x * self.w
I plot this model using
mm = MyModel()
x = Input(shape=(64, 64, 256), batch_size=10, name='Input')
m = Model(inputs=[x], outputs=mm.call(x))
tf.keras.utils.plot_model(m)
Problem: I encountered the following warning:
WARNING:tensorflow:
The following Variables were used a Lambda layer's call (tf.math.multiply_2), but
are not present in its tracked objects:
<tf.Variable 'Variable:0' shape=(256,) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
Question: Can I savely ignore the warning and the weights are still learned? If yes, how can I suppress this warning?
Method 2
As suggested in the warning, I wrap the multiplication in its own subclassed layer:
class MyMultiply(Layer):
def __init__(self):
super(MyMultiply, self).__init__()
def call(self, x):
return tf.multiply(x[0], x[1])
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
self.mul = MyMultiply()
def call(self, x):
return self.mul([x, self.w])
Problem: This works until the model is plotted. Then I encounter the following error: AttributeError: 'ResourceVariable' object has no attribute '_keras_history'
Traceback:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-18-e4cc5cc97726> in <module>()
21 x = Input(shape=(64, 64, 256), batch_size=10, name='Input')
22 m = Model(inputs=[x], outputs=mm.call(x))
---> 23 tf.keras.utils.plot_model(m)
---------------------------------------------------------------------------
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/node.py in <lambda>(t)
259 if self.is_input:
260 return []
--> 261 inbound_layers = nest.map_structure(lambda t: t._keras_history.layer,
262 self.call_args[0])
263 return inbound_layers
AttributeError: 'ResourceVariable' object has no attribute '_keras_history'
Question: How do I resolve that error? Is this a bug (I submitted an issue to the tf github repo, however it was deleted immediately)?
Method 3
I try to use keras.layers.Multiply instead:
from tensorflow.keras.layers import Multiply
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
self.mul = Multiply()
def call(self, x):
return self.mul([x, self.w])
Problem: ValueError: Can not merge tensors with different batch sizes. Got tensors with shapes : [(10, 64, 64, 256), (256,)]
To my understanding, the ValueError occurs because the internal _Merge layer checks for equal batch sizes. The internal Multiply layer however implements the multiplication with broadcasting (which should work!):
# from tensorflow/python/keras/layers/merge.py line 316-320
def _merge_function(self, inputs):
output = inputs[0]
for i in range(1, len(inputs)):
output = output * inputs[i]
return output
I could use tf.broadcast_to and so on, however, to my understanding this would materialize the tensor and would occupy more memory which I try to avoid.
Question: Is there another way to make keras.layers.Multiply work, so ultimately the model plotting works?
You can avoid the warning in Method 1 by creating a Keras Layer instead of a Model.
import tensorflow as tf
class MyLayer(tf.keras.layers.Layer):
def __init__(self):
super(MyLayer, self).__init__()
self.w = self.add_weight(name='multiply_weight', shape=(256,), trainable=True)
def call(self, x):
return tf.multiply(x, self.w)
mul_layer = MyLayer()
x = tf.keras.Input(shape=(64, 64, 256), batch_size=10, name='Input')
output = mul_layer(x)
m = tf.keras.Model(inputs=[x], outputs=output)
tf.keras.utils.plot_model(m)

How does Tensorflow or Keras handle model weight inititialization and when does it happen?

After reading the answer to this question I am a bit confused as to when exactly TensorFlow initializes the weight and bias variables.
As per the answers, Compile defines the loss function, the optimizer and the metrics. That's all.
Since the compile() method doesn't initialize it then that would suggest that it happens during the fit() method run.
However the issue with that is, in case of loading models or loading weights how would fit() know that the weights, its presented with, are actually useful and should not be thrown away and then assigned random values in place of those.
We pass the type of intitializer in the argument kernel_initializer while declaring the layer. For example:
dense02 = tf.keras.layers.Dense(units=10,
kernel_initializer='glorot_uniform',
bias_initializer='zeros')
So an obvious question would be whether the weights are initialized layer by layer during the first epoch forward pass or does it happen for all layers before the first epoch.
(What I am trying to say is that if there say 5 Dense layers in the model, then does the initialization happen say a layer at a time, i.e. the first Dense layer gets initialized then the forward pass happens for that layer, then the second layer is initialized and the forward pass for second Dense layer happens and so on)
Another aspect is regarding transfer learning, when stacking custom layers on top of a trained model, the trained model layers have the weights, while the layers that I added wouldn't have any useful layers. So how would TensorFlow know to only initialize the variables of the layers I added and not the mess up the layers of the transferred model (provided, I don't have trainable=False)
How does TensorFlow or Keras handle weight initialization?
The weights are initialized when the model is created (when each layer in model is initialized), i.e before the compile() and fit():
import tensorflow as tf
from tensorflow.keras import models, layers
inputs = layers.Input((3, ))
outputs = layers.Dense(units=10,
kernel_initializer='glorot_uniform',
bias_initializer='zeros')(inputs)
model = models.Model(inputs=inputs, outputs=outputs)
for layer in model.layers:
print("Config:\n{}\nWeights:\n{}\n".format(layer.get_config(), layer.get_weights()))
Outputs:
Config:
{'batch_input_shape': (None, 3), 'dtype': 'float32', 'sparse': False, 'ragged': False, 'name': 'input_1'}
Weights:
[]
Config:
{'name': 'dense', 'trainable': True, 'dtype': 'float32', 'units': 10, 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'GlorotUniform', 'config': {'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}
Weights:
[array([[-0.60352975, 0.08275259, -0.6521113 , -0.5860774 , -0.42276743,
-0.3142944 , -0.28118378, 0.07770532, -0.5644444 , -0.47069687],
[ 0.4611913 , 0.35170448, -0.62191975, 0.5837332 , -0.3390234 ,
-0.4033073 , 0.03493106, -0.06078851, -0.53159714, 0.49872506],
[ 0.43685734, 0.6160207 , 0.01610583, -0.3673877 , -0.14144647,
-0.3792309 , 0.05478126, 0.602067 , -0.47438127, 0.36463356]],
dtype=float32), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)]
After doing a bit more research, even though Mr. For Example's answer is correct, lets get a bit more deep into how initialization works in TensorFlow Keras.
As per the tf.keras.layers.Layer Doc, we can create variables in the following two methods:
__init__(self, ...): Defines custom layer attributes, and creates layer state variable that do not depend on input shapes, using add_weight()
build(self, input_shape): This method can be used to create weights that depend on the shape(s) of the input(s), using add_weight()
kb
The below code shows an example of a basic layer with 2 variables that does the computation: y = w . x + b:
class SimpleDense(Layer):
def __init__(self, units=32):
super(SimpleDense, self).__init__()
self.units = units
def build(self, input_shape): # Create the state of the layer (weights)
w_init = tf.random_normal_initializer()
self.w = tf.Variable(
initial_value=w_init(shape=(input_shape[-1], self.units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(self.units,), dtype='float32'),
trainable=True)
def call(self, inputs): # Defines the computation from inputs to outputs
return tf.matmul(inputs, self.w) + self.b
# Instantiates the layer.
linear_layer = SimpleDense(4)
# This will also call `build(input_shape)` and create the weights.
y = linear_layer(tf.ones((2, 2)))
assert len(linear_layer.weights) == 2
# These weights are trainable, so they're listed in `trainable_weights`:
assert len(linear_layer.trainable_weights) == 2
The most interesting thing to note in the above code is when the build method is called.
The build() is called when the layer (after it has been initialized) is assigned some sort of input whether it be actual values or just a TensorFlow placeholder.
When using a Keras Sequential model, we add a layer to the model, it automatically assigns the input placeholder to the layer and there by initializing it at the same time.
Thus we see the weights before the calling of compile() or the fit() methods of the Keras Model. (Note that __call__() will automatically build the layer (if it has not been built yet) by calling build())
Regarding Transfer Learning, when we are loading the transferred model, we are loading already built layers, so the build method is not called again when you add the layers to your own model.
In other words, the layers, of the transferred model, already have had the input placeholder assigned to it and the build() method has already been called when the transferred model was being trained.
Useful References:
Keras Layer Doc
TF Tutorial: Custom Layers

Tensorflow asking to run the build even though it is done

As always, tensorflow the weird dumb framework is going unintuitive haywire piece of crap on me. Can someone please be kind enough to help me out with this? I am able to run the checkpointing (how much of a mess can saving a model be? leave it to tensorflow to make a mountain out of a molehill) tutorial as given on the tutorial page, but, dare i make a little modification here a little modification there. The sticks and stones contraption called tensorflow comes crumbling down.
As you can clearly see i am running the build method but i am getting the error that i must run the build method with an input shape. In tutorial the build method is not there at all and the one layer self.l1 is built in the __init__ itself which, they themselves advice against at several other places
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
#self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1 = tf.keras.layers.Dense(5)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=input_shape,dtype=tf.float32))
print('built layers')
def call(self, x):
return self.l1(x)
net = Net()
net.build([1,])
net.save_weights('easy_checkpoint')
The output and trace i am getting is:
built layers
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-31-3b54dc506ffd> in <module>
1 net = Net()
2 net.build([1,])
----> 3 net.save_weights('easy_checkpoint')
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in save_weights(self, filepath, overwrite, save_format)
1111 ValueError: For invalid/unknown format arguments.
1112 """
-> 1113 self._assert_weights_created()
1114 filepath_is_h5 = _is_hdf5_filepath(filepath)
1115 if save_format is None:
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _assert_weights_created(self)
1560 'Weights are created when the Model is first called on '
1561 'inputs or `build()` is called with an `input_shape`.' %
-> 1562 self.name)
1563
1564 def _graph_network_add_loss(self, symbolic_loss):
ValueError: Weights for model net_10 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.
Edit: Here is my hunch: The problem with my code is that the build does not execute the build of self.l1 but just creates it. Things do work out fine if i add self.l1 creation in __init__ and call super().__build__() as the first line in Net's build. Things make sense so far but, the code fails again if i replace super().build(input_shape) with self.l1.build(input_shape). Also, the code belows shows that all the variables are actually there. So, i am lost again. Any help is much appreciated
tf.random.set_seed(42)
class Net1(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net1, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
super().build(input_shape)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print(self.variables)
def call(self, x):
return self.l1(x)
net = Net1()
net.build((10,1))
print('*'*50)
print(net.variables)
output:
[<tf.Variable 'dense_56/kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'dense_56/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>]
**************************************************
[<tf.Variable 'dense_56/kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'dense_56/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>]
whereas,
tf.random.set_seed(42)
class Net1(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net1, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1.build(input_shape)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print('variables',self.l1.variables,self.dummy)
def call(self, x):
return self.l1(x)
net = Net1()
net.build((10,1))
print(net.variables)
output:
variables [<tf.Variable 'kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>] <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-77-35561efcdc2f> in <module>
15 net = Net1()
16 net.build((10,1))
---> 17 print(net.variables)
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in variables(self)
1965 A list of variables.
1966 """
-> 1967 return self.weights
1968
1969 #property
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in weights(self)
498 A list of variables.
499 """
--> 500 return self._dedup_weights(self._undeduplicated_weights)
501
502 #property
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _undeduplicated_weights(self)
503 def _undeduplicated_weights(self):
504 """Returns the undeduplicated list of all layer variables/weights."""
--> 505 self._assert_weights_created()
506 weights = []
507 for layer in self._layers:
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _assert_weights_created(self)
1560 'Weights are created when the Model is first called on '
1561 'inputs or `build()` is called with an `input_shape`.' %
-> 1562 self.name)
1563
1564 def _graph_network_add_loss(self, symbolic_loss):
ValueError: Weights for model net1_40 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.
TL/DR: This is not a problem with save_weight method. In order to build a subclassed model, you need to run the subclassed model on a real input. I only added two lines to the end of your code as shown below.
#net.build(input_shape=[1,]) # don't need it. When you call the model with real input, `build` method will be executed
x_train = tf.random.normal(shape=(100,1),dtype=tf.float32)
output=net.predict(x_train)
Please check below for more details.
import tensorflow as tf
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
#self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1 = tf.keras.layers.Dense(5)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print('built layers')
def call(self, x):
return self.l1(x)
net = Net()
#net.build(input_shape=[1,]) # don't need it. When you call the model with real input, `build` method will be executed
x_train = tf.random.normal(shape=(100,1),dtype=tf.float32)
output=net.predict(x_train)
net.save_weights('easy_checkpoint')
A subclassed model is a piece of Python code (a call method). There is no graph of layers here. We cannot know how layers are connected to each other (because that's defined in the body of call, not as an explicit data structure), so we cannot infer input / output shapes. You can try printing model.summary after instantiating the subclass model. It will throw same error as you reported.
In contrast to subclassed models, You can do all these things (printing summary, input / output shapes) in a Functional or Sequential model because these models are static graphs of layers.
With that simple modification, your code is working as expected. I can print the weights, shapes etc., and can save weights also.

Keras TimeDistributed Not Masking CNN Model

For the sake of example, I have an input consisting of 2 images,of total shape (2,299,299,3). I'm trying to apply inceptionv3 on each image, and then subsequently process the output with an LSTM. I'm using a masking layer to exclude a blank image from being processed (specified below).
The code is:
import numpy as np
from keras import backend as K
from keras.models import Sequential,Model
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, \
Input, GlobalAveragePooling2D, Masking,TimeDistributed, LSTM,Dense,Flatten,Reshape,Lambda, Concatenate
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.applications import inception_v3
IMG_SIZE=(299,299,3)
def create_base():
base_model = inception_v3.InceptionV3(weights='imagenet', include_top=False)
x = GlobalAveragePooling2D()(base_model.output)
base_model=Model(base_model.input,x)
return base_model
base_model=create_base()
#Image mask to ignore images with pixel values of -1
IMAGE_MASK = -2*np.expand_dims(np.ones(IMG_SIZE),0)
final_input=Input((2,IMG_SIZE[0],IMG_SIZE[1],IMG_SIZE[2]))
final_model = Masking(mask_value = -2.)(final_input)
final_model = TimeDistributed(base_model)(final_model)
final_model = Lambda(lambda x: x, output_shape=lambda s:s)(final_model)
#final_model = Reshape(target_shape=(2, 2048))(final_model)
#final_model = Masking(mask_value = 0.)(final_model)
final_model = LSTM(5,return_sequences=False)(final_model)
final_model = Model(final_input,final_model)
#Create a sample test image
TEST_IMAGE = np.ones(IMG_SIZE)
#Create a test sample input, consisting of a normal image and a masked image
TEST_SAMPLE = np.concatenate((np.expand_dims(TEST_IMAGE,axis=0),IMAGE_MASK))
inp = final_model.input # input placeholder
outputs = [layer.output for layer in final_model.layers] # all layer outputs
functors = [K.function([inp]+ [K.learning_phase()], [out]) for out in outputs]
layer_outs = [func([np.expand_dims(TEST_SAMPLE,0), 1.]) for func in functors]
This does not work correctly. Specifically, the model should mask the IMAGE_MASK part of the input, but it instead processes it with inception (giving a nonzero output). here are the details:
layer_out[-1] , the LSTM output is fine:
[array([[-0.15324114, -0.09620268, -0.01668587, 0.07938149, -0.00757846]], dtype=float32)]
layer_out[-2] and layer_out[-3] , the LSTM input is wrong, it should have all zeros in the second array:
[array([[[ 0.37713543, 0.36381325, 0.36197218, ..., 0.23298527,
0.43247852, 0.34844452],
[ 0.24972123, 0.2378867 , 0.11810347, ..., 0.51930511,
0.33289322, 0.33403745]]], dtype=float32)]
layer_out[-4], the input to the CNN is correctly masked:
[[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
...,
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]]],
[[[-0., -0., -0.],
[-0., -0., -0.],
[-0., -0., -0.],
...,
[-0., -0., -0.],
[-0., -0., -0.],
[-0., -0., -0.]],
Note that the code seems to work correctly with a simpler base_model such as:
def create_base():
input_layer=Input(IMG_SIZE)
base_model=Flatten()(input_layer)
base_model=Dense(2048)(base_model)
base_model=Model(input_layer,base_model)
return base_model
I have exhausted most online resources on this. Permutations of this question have been asked on Keras's github, such as here, here and here, but I can't seem to find any concrete resolution.
The links suggest that the issues seem to be stemming from a combination of TimeDistributed being applied to BatchNormalization, and the hacky fixes of either the Lambda identity layer, or Reshape layers remove errors but don't seem to output the correct model.
I've tried to force the base model to support masking via:
base_model.__setattr__('supports_masking',True)
and I've also tried applying an identity layer via:
TimeDistributed(Lambda(lambda x: base_model(x), output_shape=lambda s:s))(final_model)
but none of these seem to work. Note that I would like the final model to be trainable, in particular the CNN part of it should remain trainable.
Not entirely sure this will work, but based on the comment made here, with a newer version of tensorflow + keras it should work:
final_model = TimeDistributed(Flatten())(final_input)
final_model = Masking(mask_value = -2.)(final_model)
final_model = TimeDistributed(Reshape(IMG_SIZE))(final_model)
final_model = TimeDistributed(base_model)(final_model)
final_model = Model(final_input,final_model)
I took a look at the source code of masking, and I noticed Keras creates a mask tensor that only reduces the last axis. As long as you're dealing with 5D tensors, it will cause no problem, but when you reduce the dimensions for the LSTM, this masking tensor becomes incompatible.
Doing the first flatten step, before masking, will assure that the masking tensor works properly for 3D tensors. Then you expand the image again to its original size.
I'll probably try to install newer versions soon to test it myself, but these installing procedures have caused too much trouble and I'm in the middle of something important here.
On my machine, this code compiles, but that strange error appears in prediction time (see link at the first line of this answer).
Creating a model for predicting the intermediate layers
I'm not sure, by the code I've seen, that the masking function is kept internally in tensors. I don't know exactly how it works, but it seems to be managed separately from the building of the functions inside the layers.
So, try using a keras standard model to make the predictions:
inp = final_model.input # input placeholder
outputs = [layer.output for layer in final_model.layers] # all layer outputs
fullModel = Model(inp,outputs)
layerPredictions = fullModel.predict(np.expand_dims(TEST_SAMPLE,0))
print(layerPredictions[-2])
It seems to be working as intended. Masking in Keras doesn't produce zeros as you would expect, it instead skips the timesteps that are masked in upstream layers such as LSTM and loss calculation. In case of RNNs, Keras (at least tensorflow) is implemented such that the states from the previous step are carried over, tensorflow_backend.py. This is done in part to preserve the shapes of tensors when dynamic input is given.
If you really want zeros you will have to implement your own layer with a similar logic to Masking and return zeros explicitly. To solve your problem, you need a mask before the final LSTM layer using the final_input:
class MyMask(Masking):
"""Layer that adds a mask based on initial input."""
def compute_mask(self, inputs, mask=None):
# Might need to adjust shapes
return K.any(K.not_equal(inputs[0], self.mask_value), axis=-1)
def call(self, inputs):
# We just return input back
return inputs[1]
def compute_output_shape(self, input_shape):
return input_shape[1]
final_model = MyMask(mask_value=-2.)([final_input, final_model])
You probably can attach the mask in a simpler manner but this custom class essentially adds a mask based on your initial inputs and outputs a Keras tensor that now has a mask.
Your LSTM will ignore in your example the second image. To confirm you can return_sequences=Trueand check that the output for 2 images are identical.
I'm trying implement the same thing, I want my LSTM sequences to have variable sizes. However I can't even implement your original model. I obtain the following error: TypeError: Layer input_1 does not support masking, but was passed an input_mask: Tensor("time_distributed_1/Reshape_1:0", shape=(?, 100, 100), dtype=bool) I'm using tensorflow 1.10 and keras 2.2.2
I solved the problem by adding a second input, a mask to specify which timesteps to take into account for the LSTM. That way the image sequence always has the same number of timesteps, the CNN always generates an output, but some of them are ignored for the LSTM input. However, the missing images need to be chosen carefully so that the batch normalization is not affected.
def LSTM_CNN(params):
resnet = ResNet50(include_top=False, weights='imagenet', pooling = 'avg')
input_layer = Input(shape=(params.numFrames, params.height, params.width, 3))
input_mask = Input(shape=(params.numFrames,1))
curr_layer = TimeDistributed(resnet)(input_layer)
resnetOutput = Dropout(0.5)(curr_layer)
curr_layer = multiply([resnetOutput,input_mask])
cnn_output = curr_layer
curr_layer = Masking(mask_value=0.0)(curr_layer)
lstm_out = LSTM(256, dropout=0.5)(curr_layer)
output = Dense(output_dim=params.numClasses, activation='sigmoid')(lstm_out)
model = Model([input_layer, input_mask], output)
return model

Create keras callback to save model predictions and targets for each batch during training

I am building a simple Sequential model in Keras (tensorflow backend). During training I want to inspect the individual training batches and model predictions. Therefore, I am trying to create a custom Callback that saves the model predictions and targets for each training batch. However, the model is not using the current batch for prediction, but the entire training data.
How can I hand over only the current training batch to the Callback?
And how can I access the batches and targets that the Callback saves in self.predhis and self.targets?
My current version looks as follows:
callback_list = [prediction_history((self.x_train, self.y_train))]
self.model.fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=self.n_epochs, validation_data=(self.x_val, self.y_val), callbacks=callback_list)
class prediction_history(keras.callbacks.Callback):
def __init__(self, train_data):
self.train_data = train_data
self.predhis = []
self.targets = []
def on_batch_end(self, epoch, logs={}):
x_train, y_train = self.train_data
self.targets.append(y_train)
prediction = self.model.predict(x_train)
self.predhis.append(prediction)
tf.logging.info("Prediction shape: {}".format(prediction.shape))
tf.logging.info("Targets shape: {}".format(y_train.shape))
NOTE: this answer is outdated and only works with TF1. Check #bers's answer for a solution tested on TF2.
After model compilation, the placeholder tensor for y_true is in model.targets and y_pred is in model.outputs.
To save the values of these placeholders at each batch, you can:
First copy the values of these tensors into variables.
Evaluate these variables in on_batch_end, and store the resulting arrays.
Now step 1 is a bit involved because you'll have to add an tf.assign op to the training function model.train_function. Using current Keras API, this can be done by providing a fetches argument to K.function() when the training function is constructed.
In model._make_train_function(), there's a line:
self.train_function = K.function(inputs,
[self.total_loss] + self.metrics_tensors,
updates=updates,
name='train_function',
**self._function_kwargs)
The fetches argument containing the tf.assign ops can be provided via model._function_kwargs (only works after Keras 2.1.0).
As an example:
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import Callback
from keras import backend as K
import tensorflow as tf
import numpy as np
class CollectOutputAndTarget(Callback):
def __init__(self):
super(CollectOutputAndTarget, self).__init__()
self.targets = [] # collect y_true batches
self.outputs = [] # collect y_pred batches
# the shape of these 2 variables will change according to batch shape
# to handle the "last batch", specify `validate_shape=False`
self.var_y_true = tf.Variable(0., validate_shape=False)
self.var_y_pred = tf.Variable(0., validate_shape=False)
def on_batch_end(self, batch, logs=None):
# evaluate the variables and save them into lists
self.targets.append(K.eval(self.var_y_true))
self.outputs.append(K.eval(self.var_y_pred))
# build a simple model
# have to compile first for model.targets and model.outputs to be prepared
model = Sequential([Dense(5, input_shape=(10,))])
model.compile(loss='mse', optimizer='adam')
# initialize the variables and the `tf.assign` ops
cbk = CollectOutputAndTarget()
fetches = [tf.assign(cbk.var_y_true, model.targets[0], validate_shape=False),
tf.assign(cbk.var_y_pred, model.outputs[0], validate_shape=False)]
model._function_kwargs = {'fetches': fetches} # use `model._function_kwargs` if using `Model` instead of `Sequential`
# fit the model and check results
X = np.random.rand(10, 10)
Y = np.random.rand(10, 5)
model.fit(X, Y, batch_size=8, callbacks=[cbk])
Unless the number of samples can be divided by the batch size, the final batch will have a different size than other batches. So K.variable() and K.update() can't be used in this case. You'll have to use tf.Variable(..., validate_shape=False) and tf.assign(..., validate_shape=False) instead.
To verify the correctness of the saved arrays, you can add one line in training.py to print out the shuffled index array:
if shuffle == 'batch':
index_array = _batch_shuffle(index_array, batch_size)
elif shuffle:
np.random.shuffle(index_array)
print('Index array:', repr(index_array)) # Add this line
batches = _make_batches(num_train_samples, batch_size)
The shuffled index array should be printed out during fitting:
Epoch 1/1
Index array: array([8, 9, 3, 5, 4, 7, 1, 0, 6, 2])
10/10 [==============================] - 0s 23ms/step - loss: 0.5670
And you can check if cbk.targets is the same as Y[index_array]:
index_array = np.array([8, 9, 3, 5, 4, 7, 1, 0, 6, 2])
print(Y[index_array])
[[ 0.75325592 0.64857277 0.1926653 0.7642865 0.38901153]
[ 0.77567689 0.13573623 0.4902501 0.42897559 0.55825652]
[ 0.33760938 0.68195038 0.12303088 0.83509441 0.20991668]
[ 0.98367778 0.61325065 0.28973401 0.28734073 0.93399794]
[ 0.26097574 0.88219054 0.87951941 0.64887846 0.41996446]
[ 0.97794604 0.91307569 0.93816428 0.2125808 0.94381495]
[ 0.74813435 0.08036688 0.38094272 0.83178364 0.16713736]
[ 0.52609421 0.39218962 0.21022047 0.58569125 0.08012982]
[ 0.61276627 0.20679494 0.24124858 0.01262245 0.0994412 ]
[ 0.6026137 0.25620512 0.7398164 0.52558182 0.09955769]]
print(cbk.targets)
[array([[ 0.7532559 , 0.64857274, 0.19266529, 0.76428652, 0.38901153],
[ 0.77567691, 0.13573623, 0.49025011, 0.42897558, 0.55825651],
[ 0.33760938, 0.68195039, 0.12303089, 0.83509439, 0.20991668],
[ 0.9836778 , 0.61325067, 0.28973401, 0.28734073, 0.93399793],
[ 0.26097575, 0.88219053, 0.8795194 , 0.64887846, 0.41996446],
[ 0.97794604, 0.91307569, 0.93816429, 0.2125808 , 0.94381493],
[ 0.74813437, 0.08036689, 0.38094273, 0.83178365, 0.16713737],
[ 0.5260942 , 0.39218962, 0.21022047, 0.58569127, 0.08012982]], dtype=float32),
array([[ 0.61276627, 0.20679495, 0.24124858, 0.01262245, 0.0994412 ],
[ 0.60261369, 0.25620511, 0.73981643, 0.52558184, 0.09955769]], dtype=float32)]
As you can see, there are two batches in cbk.targets (one "full batch" of size 8 and the final batch of size 2), and the row order is the same as Y[index_array].
Long edit (almost a new answer) for the following reasons:
Yu-Yang's 2017 answer relies on the private _make_train_function and _function_kwargs APIs, which work only in TF1 (and maybe in TF1 compatibility, so-called non-eager mode).
Similarly, Binyan Hu's 2020 answer relies on _make_test_function and does not work in TF2 by default (requiring non-eager mode as well).
My own Jan 2020 answer, which was already subject to several required configuration settings, seems to have stopped working with (or before) TF 2.5, and I was not able to make model.inputs or model.outputs work any longer.
Finally, the earlier version of this answer requires potentially expensive model evaluation to obtain the predictions for each batch. A similar solution to obtain activation histograms even led to OOM issues with repeated training of different models.
So I set out find a way to obtain all possible quantities (inputs, targets, predictions, activations), batch-wise, without using any private APIs. The aim was to be able to call .numpy() on the intended quantities, so Keras callbacks can run ordinary Python code to ease debugging (I suppose that is what this question is mainly about - for maximum performance, one would probably try to integrate as many computations as possible into TensorFlow's graph operations anyway).
This is the common base model for all solutions:
"""Demonstrate batch data access."""
import tensorflow as tf
from tensorflow import keras
class DataCallback(keras.callbacks.Callback):
"""This class is where all implementations differ."""
def tf_nan(dtype):
"""Create NaN variable of proper dtype and variable shape for assign()."""
return tf.Variable(float("nan"), dtype=dtype, shape=tf.TensorShape(None))
def main():
"""Run main."""
model = keras.Sequential([keras.layers.Dense(1, input_shape=(2,))])
callback = DataCallback()
model.compile(loss="mse", optimizer="adam")
model.fit(
x=tf.transpose(tf.range(7.0) + [[0.2], [0.4]]),
y=tf.transpose(tf.range(7.0) + 10 + [[0.5]]),
validation_data=(
tf.transpose(tf.range(11.0) + 30 + [[0.6], [0.7]]),
tf.transpose(tf.range(11.0) + 40 + [[0.9]]),
),
shuffle=False,
batch_size=3,
epochs=2,
verbose=0,
callbacks=[callback],
)
model.save("tmp.tf")
if __name__ == "__main__":
main()
The following three snippets show one possible solution each, each with their own pros and cons. The core trick is always the same: allocate a tf.Variable and use tf.Variable.assign to export the intended quantity, from some Keras code run in graph mode, into the callback. The methods differ slightly in callback initialization and (in one case) model compilation, and most importantly, in the quantities they can access, which is why I summarize them above each snippet.
Custom metric
Using a custom (fake) metric (similar to my Jan 2020 answer), while we cannot seem to access model.inputs nor model.outputs any more (and model.(_)targets does not even exist any longer), we can access y_true and y_pred, which represent the model targets and outputs:
[ ] Inputs/Samples (x)
[ ] Weights (w)
[+] Targets/Labels (y_true)
[+] Outputs/Predictions (y_pred)
[ ] All layers (or only final input/output layers)
"""Demonstrate batch data access using a custom metric."""
import tensorflow as tf
from tensorflow import keras
class DataCallback(keras.callbacks.Callback): # diff
"""Callback to operate on batch data from metric."""
def __init__(self):
"""Offer a metric to access batch data."""
super().__init__()
self.y_true = None
self.y_pred = None
def set_model(self, model):
"""Initialize variables when model is set."""
self.y_true = tf_nan(model.output.dtype)
self.y_pred = tf_nan(model.output.dtype)
def metric(self, y_true, y_pred):
"""Fake metric."""
self.y_true.assign(y_true)
self.y_pred.assign(y_pred)
return 0
def on_train_batch_end(self, _batch, _logs=None):
"""See keras.callbacks.Callback.on_train_batch_end."""
print("y_true =", self.y_true.numpy())
print("y_pred =", self.y_pred.numpy())
def on_train_end(self, _logs=None):
"""Clean up."""
del self.y_true, self.y_pred
def tf_nan(dtype):
"""Create NaN variable of proper dtype and variable shape for assign()."""
return tf.Variable(float("nan"), dtype=dtype, shape=tf.TensorShape(None))
def main():
"""Run main."""
model = keras.Sequential([keras.layers.Dense(1, input_shape=(2,))])
callback = DataCallback()
model.compile(loss="mse", optimizer="adam", metrics=[callback.metric]) # diff
model.fit(
x=tf.transpose(tf.range(7.0) + [[0.2], [0.4]]),
y=tf.transpose(tf.range(7.0) + 10 + [[0.5]]),
validation_data=(
tf.transpose(tf.range(11.0) + 30 + [[0.6], [0.7]]),
tf.transpose(tf.range(11.0) + 40 + [[0.9]]),
),
shuffle=False,
batch_size=3,
epochs=2,
verbose=0,
callbacks=[callback],
)
model.save("tmp.tf")
if __name__ == "__main__":
main()
Custom training step
A custom training step is what I used in an earlier version of this answer. The idea still works in principle, but y_pred can be expensive and it might make sense to use a custom metric (see above) if that is required.
[+] Inputs/Samples (x)
[+] Weights (w)
[+] Targets/Labels (y_true)
[~] Outputs/Predictions (y_pred) [expensive!]
[ ] All layers (or only final input/output layers)
"""Demonstrate batch data access using a custom training step."""
import tensorflow as tf
from tensorflow import keras
class DataCallback(keras.callbacks.Callback): # diff
"""Callback to operate on batch data from training step."""
def __init__(self):
"""Initialize tf.Variables."""
super().__init__()
self.x = None
self.w = None
self.y_true = None
self.y_pred = None
def set_model(self, model):
"""Wrap the model.train_step function to access training batch data."""
self.x = tf_nan(model.input.dtype)
# pylint:disable=protected-access (replace by proper dtype if you know it)
if model.compiled_loss._user_loss_weights is not None:
self.w = tf_nan(model.compiled_loss._user_loss_weights.dtype)
self.y_true = tf_nan(model.output.dtype)
self.y_pred = tf_nan(model.output.dtype)
model_train_step = model.train_step
def outer_train_step(data):
# https://github.com/keras-team/keras/blob/v2.7.0/keras/engine/training.py
x, y_true, w = keras.utils.unpack_x_y_sample_weight(data)
self.x.assign(x)
if w is not None:
self.w.assign(w)
self.y_true.assign(y_true)
result = model_train_step(data)
y_pred = model(x)
self.y_pred.assign(y_pred)
return result
model.train_step = outer_train_step
def on_train_batch_end(self, _batch, _logs=None):
"""See keras.callbacks.Callback.on_train_batch_end."""
print("x =", self.x.numpy())
if self.w is not None:
print("w =", self.w.numpy())
print("y_true =", self.y_true.numpy())
print("y_pred =", self.y_pred.numpy())
def on_train_end(self, _logs=None):
"""Clean up."""
del self.x, self.w, self.y_true, self.y_pred
def tf_nan(dtype):
"""Create NaN variable of proper dtype and variable shape for assign()."""
return tf.Variable(float("nan"), dtype=dtype, shape=tf.TensorShape(None))
def main():
"""Run main."""
model = keras.Sequential([keras.layers.Dense(1, input_shape=(2,))])
callback = DataCallback()
model.compile(loss="mse", optimizer="adam")
model.fit(
x=tf.transpose(tf.range(7.0) + [[0.2], [0.4]]),
y=tf.transpose(tf.range(7.0) + 10 + [[0.5]]),
validation_data=(
tf.transpose(tf.range(11.0) + 30 + [[0.6], [0.7]]),
tf.transpose(tf.range(11.0) + 40 + [[0.9]]),
),
shuffle=False,
batch_size=3,
epochs=2,
verbose=0,
callbacks=[callback],
)
model.save("tmp.tf")
if __name__ == "__main__":
main()
Custom layer call
A custom layer call is a super-flexible way of accessing each layer's inputs and outputs. The callback handles patching of the call functions for a list of layers. While we cannot access weights and targets (as these quantitities do not make sense at the level of individual layers), it allows us to access individual layer activations, which can be handy for questions such as How does one log activations using `tf.keras.callbacks.TensorBoard`?.
[+] Inputs/Samples (x)
[ ] Weights (w)
[ ] Targets/Labels (y_true)
[+] Outputs/Predictions (y_pred)
[+] All layers (or only final input/output layers)
"""Demonstrate batch data access using custom layer calls."""
import tensorflow as tf
from tensorflow import keras
class DataCallback(keras.callbacks.Callback): # diff
"""Callback to operate on batch data from selected (to be wrapped) layers."""
def __init__(self, layers):
"""Wrap the calls of an iterable of model layers to access layer batch data."""
super().__init__()
self.data = {}
self.inner_calls = {}
self.outer_calls = {}
for layer in layers:
self.data[layer] = {
"inputs": tf_nan(layer.input.dtype),
"outputs": tf_nan(layer.output.dtype),
}
self.inner_calls[layer] = layer.call
def outer_call(inputs, layer=layer, layer_call=layer.call):
self.data[layer]["inputs"].assign(inputs)
outputs = layer_call(inputs)
self.data[layer]["outputs"].assign(outputs)
return outputs
self.outer_calls[layer] = outer_call
def on_train_batch_begin(self, _epoch, _logs=None):
"""Wrap layer calls during each batch."""
for layer, call in self.outer_calls.items():
layer.call = call
def on_train_batch_end(self, _epoch, _logs=None):
"""Restore original layer calls for ModelCheckpoint, model.save, ..."""
for layer, call in self.inner_calls.items():
layer.call = call
for layer, data in self.data.items():
print("Layer =", layer)
print("Inputs =", data["inputs"].numpy())
print("Outputs =", data["outputs"].numpy())
def tf_nan(dtype):
"""Create NaN variable of proper dtype and variable shape for assign()."""
return tf.Variable(float("nan"), dtype=dtype, shape=tf.TensorShape(None))
def main():
"""Run main."""
model = keras.Sequential([keras.layers.Dense(1, input_shape=(2,))])
callback = DataCallback(model.layers) # diff
model.compile(loss="mse", optimizer="adam")
model.fit(
x=tf.transpose(tf.range(7.0) + [[0.2], [0.4]]),
y=tf.transpose(tf.range(7.0) + 10 + [[0.5]]),
validation_data=(
tf.transpose(tf.range(11.0) + 30 + [[0.6], [0.7]]),
tf.transpose(tf.range(11.0) + 40 + [[0.9]]),
),
shuffle=False,
batch_size=3,
epochs=2,
verbose=0,
callbacks=[callback],
)
model.save("tmp.tf")
if __name__ == "__main__":
main()
When to use which and open to-dos
I think the snippets above each solution nicely summarize what each approach is capable of. Generally,
a custom training step will be ideal to access the model input, such as batched dataset generators, effects of shuffling, etc;
a custom layer call is ideal to access the in-betweens of the model; and
a custom metric is ideal to access the outputs of the model.
I am fairly certain (but have not tried) that one can combine all approaches to be able to access all batch quantities simultaneously. I have not tested anything but training mode - each method can have further pros and cons relating to their usefulness in testing or prediction mode. Finally, I assume, but have not tested either, that their should be only minor differences between tf.keras and keras. Having tested this code on TF2.8.rc1 and Keras 2.8.0, which has moved the tf.keras code back into the keras pip package, and not using any private APIs, I believe this assumption is justified.
It would be great if this approach could be extended to access model.inputs and model.outputs again. Currently, I am getting errors such as this one:
TypeError: You are passing KerasTensor(...), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as tf.cond, tf.function, gradient tapes, or tf.map_fn. Keras Functional model construction only supports TF API calls that do support dispatching, such as tf.math.add or tf.reshape. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer call and calling that layer on this symbolic input/output.
Previous answer
From TF 2.2 on, you can use custom training steps rather than callbacks to achieve what you want. Here's a demo that works with tensorflow==2.2.0rc1, using inheritance to improve the keras.Sequential model. Performance-wise, this is not ideal as predictions are made twice, once in self(x, training=True) and once in super().train_step(data). But you get the idea.
This works in eager mode and does not use private APIs, so it should be pretty stable. One caveat is that you have to use tf.keras (standalone keras does not support Model.train_step), but I feel standalone keras is becoming more and more deprecated anyway. (In fact, tf.keras migrates to keras in TF2.8.)
"""Demonstrate access to Keras batch tensors in a tf.keras custom training step."""
import numpy as np
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.python.keras.engine import data_adapter
in_shape = (2,)
out_shape = (1,)
batch_size = 3
n_samples = 7
class SequentialWithPrint(keras.Sequential):
def train_step(self, original_data):
# Basically copied one-to-one from https://git.io/JvDTv
data = data_adapter.expand_1d(original_data)
x, y_true, w = data_adapter.unpack_x_y_sample_weight(data)
y_pred = self(x, training=True)
# this is pretty much like on_train_batch_begin
K.print_tensor(w, "Sample weight (w) =")
K.print_tensor(x, "Batch input (x) =")
K.print_tensor(y_true, "Batch output (y_true) =")
K.print_tensor(y_pred, "Prediction (y_pred) =")
result = super().train_step(original_data)
# add anything here for on_train_batch_end-like behavior
return result
# Model
model = SequentialWithPrint([keras.layers.Dense(out_shape[0], input_shape=in_shape)])
model.compile(loss="mse", optimizer="adam")
# Example data
X = np.random.rand(n_samples, *in_shape)
Y = np.random.rand(n_samples, *out_shape)
model.fit(X, Y, batch_size=batch_size)
print("X: ", X)
print("Y: ", Y)
Finally, here is a simpler example without inheritance:
"""Demonstrate access to Keras batch tensors in a tf.keras custom training step."""
import tensorflow as tf
IN_SHAPE = (2,)
OUT_SHAPE = (1,)
BATCH_SIZE = 3
N_SAMPLES = 7
def make_print_data_and_train_step(keras_model):
"""Return a train_step function that prints data batches."""
original_train_step = keras_model.train_step
def print_data_and_train_step(data):
# Adapted from https://git.io/JvDTv, skipping data_adapter.expand_1d
x, y_true, w = tf.keras.utils.unpack_x_y_sample_weight(data)
y_pred = keras_model(x, training=True)
# this is pretty much like on_train_batch_begin
tf.keras.backend.print_tensor(w, "Sample weight (w) =")
tf.keras.backend.print_tensor(x, "Batch input (x) =")
tf.keras.backend.print_tensor(y_true, "Batch output (y_true) =")
tf.keras.backend.print_tensor(y_pred, "Prediction (y_pred) =")
result = original_train_step(data)
# add anything here for on_train_batch_end-like behavior
return result
return print_data_and_train_step
# Model
model = tf.keras.Sequential([tf.keras.layers.Dense(OUT_SHAPE[0], input_shape=IN_SHAPE)])
model.train_step = make_print_data_and_train_step(model)
model.compile(loss="mse", optimizer="adam")
# Example data
X = tf.random.normal((N_SAMPLES, *IN_SHAPE))
Y = tf.random.normal((N_SAMPLES, *OUT_SHAPE))
model.fit(X, Y, batch_size=BATCH_SIZE)
print("X: ", X)
print("Y: ", Y)
Update: This approach has stopped working. See my other answer a number of solutions compatible with TF2.8 (and hopefully beyond).
One problem with #Yu-Yang's solution is that it relies on model._function_kwargs, which is not guaranteed to work as it is not part of the API. In particular, in TF2 with eager execution, session kwargs seem to be either not accepted at all or run preemptively due to eager mode.
Therefore, here is my solution tested on tensorflow==2.1.0. The trick is to replace fetches by a Keras metric, in which the assignment operations from fetches are made during training.
This even enables a Keras-only solution if the batch size divides the number of samples; otherwise, another trick has to be applied when initializing TensorFlow variables with a None shape, similar to validate_shape=False in earlier solutions (compare https://github.com/tensorflow/tensorflow/issues/35667).
Importantly, tf.keras behaves differently from keras (sometimes just ignoring assignments, or seeing variables as Keras symbolic tensors), so this updated solution takes care of both implementations (Keras==2.3.1 and tensorflow==2.1.0).
"""Demonstrate access to Keras symbolic tensors in a (tf.)keras.Callback."""
import numpy as np
import tensorflow as tf
use_tf_keras = True
if use_tf_keras:
from tensorflow import keras
from tensorflow.keras import backend as K
tf.config.experimental_run_functions_eagerly(False)
compile_kwargs = {"run_eagerly": False, "experimental_run_tf_function": False}
else:
import keras
from keras import backend as K
compile_kwargs = {}
in_shape = (2,)
out_shape = (1,)
batch_size = 3
n_samples = 7
class CollectKerasSymbolicTensorsCallback(keras.callbacks.Callback):
"""Collect Keras symbolic tensors."""
def __init__(self):
"""Initialize intermediate variables for batches and lists."""
super().__init__()
# Collect batches here
self.inputs = []
self.targets = []
self.outputs = []
# # For a pure Keras solution, we need to know the shapes beforehand;
# # in particular, batch_size must divide n_samples:
# self.input = K.variable(np.empty((batch_size, *in_shape)))
# self.target = K.variable(np.empty((batch_size, *out_shape)))
# self.output = K.variable(np.empty((batch_size, *out_shape)))
# If the shape of these variables will change (e.g., last batch), initialize
# arbitrarily and specify `shape=tf.TensorShape(None)`:
self.input = tf.Variable(0.0, shape=tf.TensorShape(None))
self.target = tf.Variable(0.0, shape=tf.TensorShape(None))
self.output = tf.Variable(0.0, shape=tf.TensorShape(None))
def on_batch_end(self, batch, logs=None):
"""Evaluate the variables and save them into lists."""
self.inputs.append(K.eval(self.input))
self.targets.append(K.eval(self.target))
self.outputs.append(K.eval(self.output))
def on_train_end(self, logs=None):
"""Print all variables."""
print("Inputs: ", *self.inputs)
print("Targets: ", *self.targets)
print("Outputs: ", *self.outputs)
#tf.function
def assign_keras_symbolic_tensors_metric(_foo, _bar):
"""
Return the assignment operations as a metric to have them evaluated by Keras.
This replaces `fetches` from the TF1/non-eager-execution solution.
"""
# Collect assignments as list of (dest, src)
assignments = (
(callback.input, model.inputs[0]),
(callback.target, model._targets[0] if use_tf_keras else model.targets[0]),
(callback.output, model.outputs[0]),
)
for (dest, src) in assignments:
dest.assign(src)
return 0
callback = CollectKerasSymbolicTensorsCallback()
metrics = [assign_keras_symbolic_tensors_metric]
# Example model
model = keras.Sequential([keras.layers.Dense(out_shape[0], input_shape=in_shape)])
model.compile(loss="mse", optimizer="adam", metrics=metrics, **compile_kwargs)
# Example data
X = np.random.rand(n_samples, *in_shape)
Y = np.random.rand(n_samples, *out_shape)
model.fit(X, Y, batch_size=batch_size, callbacks=[callback])
print("X: ", X)
print("Y: ", Y)
Inspired by the way tf.keras.callbacks.TesnsorBoard saves v1 (graph) summaries.
No variable assignments and no redundant metrics.
For use with tensorflow>=2.0.0, graph (disable eager) mode during evaluating.
Extensive operations on the numpy predictions can be implemented by overriding SavePrediction._pred_callback.
import numpy as np
import tensorflow as tf
from tensorflow import keras
tf.compat.v1.disable_eager_execution()
in_shape = (2,)
out_shape = (1,)
batch_size = 2
n_samples = 32
class SavePrediction(keras.callbacks.Callback):
def __init__(self):
super().__init__()
self._get_pred = None
self.preds = []
def _pred_callback(self, preds):
self.preds.append(preds)
def set_model(self, model):
super().set_model(model)
if self._get_pred is None:
self._get_pred = self.model.outputs[0]
def on_test_begin(self, logs):
# pylint: disable=protected-access
self.model._make_test_function()
# pylint: enable=protected-access
if self._get_pred not in self.model.test_function.fetches:
self.model.test_function.fetches.append(self._get_pred)
self.model.test_function.fetch_callbacks[self._get_pred] = self._pred_callback
def on_test_end(self, logs):
if self._get_pred in self.model.test_function.fetches:
self.model.test_function.fetches.remove(self._get_pred)
if self._get_pred in self.model.test_function.fetch_callbacks:
self.model.test_function.fetch_callbacks.pop(self._get_pred)
print(self.preds)
model = keras.Sequential([
keras.layers.Dense(out_shape[0], input_shape=in_shape)
])
model.compile(loss="mse", optimizer="adam")
X = np.random.rand(n_samples, *in_shape)
Y = np.random.rand(n_samples, *out_shape)
model.evaluate(X, Y,
batch_size=batch_size,
callbacks=[SavePrediction()])