How to use tensorflow function recompute_grad in Keras - tensorflow

I'm trying to reduce the GPU memory cost of DenseNet. I found someone use tf.contrib.layers.recompute_grad to complete this work in tensorflow. I have a Keras model and I want to use tf.contrib.layers.recompute_grad in Keras. I tried to use a Keras custom layer to wrap it up.
class Back_Recompute(Layer):
def __init__(self, filters, kernel_size, w_decay, **kwargs):
super(Back_Recompute, self).__init__(**kwargs)
self.n_filters = filters
self.we_decay = w_decay
self.ks = kernel_size
def call(self, ip):
global brcount
with tf.variable_scope('BR_{}'.format(brcount), use_resource=True):
def _x(inner_ip):
x = Conv2D(self.n_filters, self.ks, kernel_initializer='he_normal', padding='same', use_bias=False,
kernel_regularizer=l2(self.we_decay))(inner_ip)
return x
brcount = brcount + 1
_x = tf.contrib.layers.recompute_grad(_x)
return _x(ip)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[2], self.n_filters)
But weird problem is that my train code was stuck in fit_generator and cannot execute without error message. Then I thought that was probably because of Keras layer, so I have changed Keras layer Conv2D to tf.layers.conv2d. When I run the train code, I got a error message: AttributeError: 'Activation' object has no attribute 'outbound_nodes'. I don't know how to use tf.contrib.layers.recompute_grad in Keras. Does someone can help me?

Related

Custom layer in tensorflow to output the running maximum of its inputs

I am trying to create a custom layer in tensorflow to output the running maximum of its inputs. The layer has a memory variable and comparison function. I wrote the following
class ComputeMax(tf.keras.layers.Layer):
def __init__(self):
super(ComputeMax, self).__init__()
def build(self, input_shape):
self.maxval = tf.Variable(initial_value=tf.zeros((input_shape)),
trainable=False)
def call(self, inputs):
self.maxval.assign(tf.maximum(inputs, self.maxval))
return self.maxval
my_sum = ComputeMax()
x = tf.ones((1,2))
y = my_sum(x)
print(y.numpy()) # [1, 1]
y = my_sum(x)
print(y.numpy()) # [1, 1]
It works as above. When I try it in a test model:
model = Sequential()
model.add(tf.keras.Input(shape=(2)))
model.add(Dense(1, activation='relu'))
model.add(ComputeMax())
model.compile(optimizer='adam', loss='mse')
I get the error on compile:
ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 1)
What am I missing?
Actually, the layer needs to know the input neurons from the previous layer, which is the last value in input_shape. You are using input_shape as it is which is actually batch shape, leading to a layer of the shape of batch.
This implementation might help.
class ComputeMax(tf.keras.layers.Layer):
def __init__(self):
super(ComputeMax, self).__init__()
def build(self, input_shape):
self.maxval = tf.Variable(initial_value=tf.zeros((input_shape[-1])),
trainable=False)
def call(self, inputs):
self.maxval.assign(tf.maximum(inputs, self.maxval))
return self.maxval
But probably it won't give you answers with numpy 1d array.

TensorFlow 2.3: load model from ModelCheckPoint callback with both custom layers and model

I have wrote a custom code to build a UNet architecture. To do so I have firstly subclassed the tf.keras.layers.Layer object to define an encoder convolutional block composed by a conv3D layer, a BatchNormalization layer and a Activation layer, similarly I defined a decoder inverse convolutional block composed by a Conv3DTranspose layer, a BatchNormalization layer, an Activation layer and a Concatenate layer. Finally I subclassed the tf.keras.Model object to define the full model, composed by 4 enconding blocks and 4 decoding blocks.
To checkpoint the model while training I have used the tf.keras.callbacks.ModelCheckpoint callback. However when a I try to load back the model (that in fact is still training) with tf.keras.models.load_model() I receive the following error: ValueError: No model found in config file.
Here the full code for the model definition, building and fitting:
import tensorflow as tf
# Encoder block
class ConvBlock(tf.keras.layers.Layer):
def __init__(self, n_filters, conv_size, conv_stride, **kwargs):
super(ConvBlock, self).__init__(**kwargs)
self.conv3D = tf.keras.layers.Conv3D(
filters=n_filters,
kernel_size=conv_size,
strides=conv_stride,
padding="same",
)
self.batch_norm = tf.keras.layers.BatchNormalization()
self.relu = tf.keras.layers.Activation("relu")
def call(self, inputs, training=None):
h = self.conv3D(inputs)
if training:
h = self.batch_norm(h)
h = self.relu(h)
return h
# Decoder block
class InvConvBlock(tf.keras.layers.Layer):
def __init__(self, n_filters, conv_size, conv_stride, activation, **kwargs):
super(InvConvBlock, self).__init__(**kwargs)
self.conv3D_T = tf.keras.layers.Conv3DTranspose(
filters=n_filters,
kernel_size=conv_size,
strides=conv_stride,
padding="same",
)
self.batch_norm = tf.keras.layers.BatchNormalization()
self.activ = tf.keras.layers.Activation(activation)
self.concat = tf.keras.layers.Concatenate(axis=-1)
def call(self, inputs, feat_concat=None, training=None):
h = self.conv3D_T(inputs)
if training:
h = self.batch_norm(h)
h = self.activ(h)
if feat_concat is not None:
h = self.concat([h, feat_concat])
return h
class UNet(tf.keras.Model):
def __init__(self, n_filters, e_size, e_stride, d_size, d_stride, **kwargs):
super(UNet, self).__init__(**kwargs)
# Encoder
self.conv_block_1 = ConvBlock(n_filters, e_size, e_stride)
self.conv_block_2 = ConvBlock(n_filters * 2, e_size, e_stride)
self.conv_block_3 = ConvBlock(n_filters * 4, e_size, (1, 1, 1))
self.conv_block_4 = ConvBlock(n_filters * 8, e_size, (1, 1, 1))
# Decoder
self.inv_conv_block_1 = InvConvBlock(n_filters * 4, d_size, (1, 1, 1), "relu")
self.inv_conv_block_2 = InvConvBlock(n_filters * 2, d_size, (1, 1, 1), "relu")
self.inv_conv_block_3 = InvConvBlock(n_filters, d_size, d_stride, "relu")
self.inv_conv_block_4 = InvConvBlock(1, d_size, d_stride, "sigmoid")
def call(self, inputs, **kwargs):
h1 = self.conv_block_1(inputs, **kwargs)
h2 = self.conv_block_2(h1, **kwargs)
h3 = self.conv_block_3(h2, **kwargs)
h = self.conv_block_4(h3, **kwargs)
h = self.inv_conv_block_1(h, feat_concat=h3, **kwargs)
h = self.inv_conv_block_2(h, feat_concat=h2, **kwargs)
h = self.inv_conv_block_3(h, feat_concat=h1, **kwargs)
h = self.inv_conv_block_4(h, **kwargs)
return h
model = UNet(
n_filters,
e_size,
e_stride,
d_size,
d_stride,
)
model.build((None, *input_shape, 1))
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate)
metrics = [tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
model.compile(
loss=loss,
optimizer=optimizer,
metrics=metrics,
)
CP_callback = tf.keras.callbacks.ModelCheckpoint(
f"{checkpoint_dir}/model.h5", save_freq='epoch', monitor="loss"
)
unet.fit(
data,
epochs=opts.epochs,
callbacks=[CP_callback],
)
To load the model I used the following code on another python console:
import tensorflow as tf
model = tf.keras.models.load_model(f'{checkpoint_dir}/model.h5')
but here I receive the above mentioned error. What am I missing? Or what am I doing wrong?
Thank you in advance for your help.
This is because you don't define the get_config method in your custom layers. For this check, this exited answer in SO.
Otherwise, you can save the trained weights (not the full model) and load the model as follows. In that case, you don't need to define this function. Please note, it's good practice to do, however. Here is a workaround for your problem:
# callback
tf.keras.callbacks.ModelCheckpoint('model.h5',
monitor='val_loss',
verbose= 1,
save_best_only=True,
mode= 'min',
save_weights_only=True) # <---- only save weight
# train
model = UNet(
n_filters,
e_size,
e_stride,
d_size,
d_stride,
)
model.compile(...)
model.fit(...)
# inference
model = UNet(
n_filters,
e_size,
e_stride,
d_size,
d_stride,
)
model.build((None, *input_shape, 1))
model.load_weights('model.h5')
For more details, see the documentation of Serialization and saving and also collab demonstration of François Chollet. Also, We've written an article about model subclassing and custom training stuff in tf 2.x, in the Save and Load section (at the bottom) of this article, we've demonstrated many strategies, here, hope that help.
Update
I've run your public colab notebook. Unfortunately, I am facing the same issue, and it's a bit weird and currently, I don't have the exact answer for saving the entire model in the ModelCheckpoint callback with Custom Layer even if we define the get_config() method.
However, there is another workaround that may come in handy for you. As we know there are two major ways to save tf models: (1). SaveModel and HDF5 format. The way is we choose the SaveMoedl format. Which is recommended by the way and safe to use.
The key difference between HDF5 and SavedModel is that HDF5 uses object configs to save the model architecture, while SavedModel saves the execution graph. Thus, SavedModels are able to save custom objects like subclassed models and custom layers without requiring the original code.
Now, as for your requirements, you are saving the entire model along with the best loss or val_loss in training time. For that, we can define a custom callback do save the model for lowest validation_loss (or whatever you want). As follows:
class SaveModelH5(tf.keras.callbacks.Callback):
def on_train_begin(self, logs=None):
self.val_loss = []
def on_epoch_end(self, epoch, logs=None):
current_val_loss = logs.get("val_loss")
self.val_loss.append(logs.get("val_loss"))
if current_val_loss <= min(self.val_loss):
print('Find lowest val_loss. Saving entire model.')
self.model.save('unet', save_format='tf') # < ----- Here
save_model = SaveModelH5()
unet.fit(.., callbacks=save_model)
Using
model.save('any_name', save_format=`tf`)
allows us create a any_name working directory, inside which it contains assets, saved_model.pb, and variables. The model architecture and training configuration, including the optimizer, losses, and metrics are stored in saved_model.pb. The weights are saved in the variables directory.
When saving the model and its layers, the SavedModel format stores the class name, call function, losses, and weights (and the config, if implemented). The call function defines the computation graph of the model/layer. In the absence of the model/layer config, the call function is used to create a model that exists like the original model which can be trained, evaluated, and used for inference. When we need to re-load the saved model, we can do as follows:
new_unet = tf.keras.models.load_model("unet", compile=False)
Colab.

Flag for training and test for custom layer in Keras

I want to create a custom keras layer which does something during training and something else for validation or testing.
from tensorflow import keras
K = keras.backend
from keras.layers import Layer
import tensorflow as tf
class MyCustomLayer(Layer):
def __init__(self, ratio=0.5, **kwargs):
self.ratio = ratio
super(MyCustomLayer, self).__init__(**kwargs)
#tf.function
def call(self, x, is_training=None):
is_training = K.learning_phase()
tf.print("training: ", is_training)
if is_training is 1 or is_training is True:
xs = x * 4
return xs
else:
xs = x*0
return xs
model = Sequential()
model.add(Dense(16, input_dim=input_dim))
model.add(MyCustomLayer(0.5))
model.add(ReLU())
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(output_dim, activation='softmax', kernel_regularizer=l2(0.01)))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, validation_split=0.05, epochs=5)
In the output I always get:
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
training: 0
Does anyone knows how to fix this?
There are some issues and misconceptions here. First you are mixing imports between keras and tf.keras imports, you should use only one of them. Second the parameter for call is called training, not is_training.
I think the issue is that tf.print does not really print the value of the training variable as its a tensorflow symbolic variable and might change value indirectly. There are other ways to check if the layer behaves differently during inference and training, for example:
class MyCustomLayer(Layer):
def __init__(self, ratio=0.5, **kwargs):
super(MyCustomLayer, self).__init__(**kwargs)
def call(self, inputs, training=None):
train_x = inputs * 4
test_x = inputs * 0
return K.in_train_phase(train_x,
test_x,
training=training)
Using then this model:
model = Sequential()
model.add(Dense(1, input_dim=10))
model.add(MyCustomLayer(0.5))
model.compile(loss='mse', optimizer='adam')
And making an instance of a function that explictly receives the K.learning_phase() variable:
fun = K.function([model.input, K.learning_phase()], [model.output])
If you call it with Klearning_phase() set to 1 or 0 you do see different outputs:
d = np.random.random(size=(2,10))
print(fun([d, 1]))
print(fun([d, 0]))
Result:
[array([[4.1759257], [3.9988194]], dtype=float32)]
[array([[0.], [0.]], dtype=float32)]
And this indicates that the layer has differen behavior during training and inference/testing.
So, I just figured out what was going wrong. I was mixing two different types of classes:
from keras import Sequential
from tensorflow import keras
K = keras.backend
So, the model is using keras and I was calling the flag from tensorflow.keras. For this reason K.learning_phase() was not working as expected.
To fix it I used
from tensorflow.keras import Sequential
from tensorflow import keras
K = keras.backend

Implementation of BERT in keras with TF_HUB

I was trying to implement the Google Bert model in tensorflow-keras using tensorflow hub. For this I designed a custom keras layer "Bertlayer" . Now the problem is when I am compiling the keras model it keeps showing that
AttributeError: 'Bertlayer' object has no attribute '_keras_style'
I don't know where I am wrong and what _keras_style attribute is.Please help to find the error in the code.
This is the github link to the full code: https://github.com/PradyumnaGupta/BERT/blob/master/Untitled21.ipynb
class BertLayer(tf.layers.Layer):
def __init__(self, n_fine_tune_layers=10, **kwargs):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
bert_path,
trainable=self.trainable,
name="{}_module".format(self.name)
)
trainable_vars = self.bert.variables
# Remove unused layers
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
# Select how many layers to fine tune
trainable_vars = trainable_vars[-self.n_fine_tune_layers :]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
return result
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
So, tensorflow version 1.* is a bit misleading. It actually has 2 bases classes called Layer. One - the one that you are using. It is intended to implement shortcut wrappers over regular TF operations. The other from tensorflow.keras.layers import Layer is for Keras-like models and sequencies.
Judging by your error, you are using keras/models to train further.
You probably should start form derivering your layer from keras.layers.Layer instead of tf.layers.Layer.

Tensorflow dense layers worse than keras sequential

I try to train an agent on the inverse-pendulum (similar to cart-pole) problem, which is a benchmark of reinforcement learning. I use neural-fitted-Q-iteration algorithm which uses a multi-layer neural network to evaluate the Q function.
I use Keras.Sequential and tf.layers.dense to build the neural network repectively, and leave all other things to be the same. However, Keras gives me a good results and tensorflow does not. In fact, tensorflow doesn't work at all with its loss being increasing and the agent learns nothing from the training.
Here I present the code for Keras as follows
def build_model():
model = Sequential()
model.add(Dense(5, input_dim=3))
model.add(Activation('sigmoid'))
model.add(Dense(5))
model.add(Activation('sigmoid'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
adam = Adam(lr=1E-3)
model.compile(loss='mean_squared_error', optimizer=adam)
return model
and the tensorflow version is
class NFQ_fit(object):
"""
neural network approximator for NFQ iteration
"""
def __init__(self, sess, N_feature, learning_rate=1E-3, batch_size=100):
self.sess = sess
self.N_feature = N_feature
self.learning_rate = learning_rate
self.batch_size = batch_size
# DNN structure
self.inputs = tf.placeholder(tf.float32, [None, N_feature], 'inputs')
self.labels = tf.placeholder(tf.float32, [None, 1], 'labels')
self.l1 = tf.layers.dense(inputs=self.inputs,
units=5,
activation=tf.sigmoid,
use_bias=True,
kernel_initializer=tf.truncated_normal_initializer(0.0, 1E-2),
bias_initializer=tf.constant_initializer(0.0),
kernel_regularizer=tf.contrib.layers.l2_regularizer(1E-4),
name='hidden-layer-1')
self.l2 = tf.layers.dense(inputs=self.l1,
units=5,
activation=tf.sigmoid,
use_bias=True,
kernel_initializer=tf.truncated_normal_initializer(0.0, 1E-2),
bias_initializer=tf.constant_initializer(0.0),
kernel_regularizer=tf.contrib.layers.l2_regularizer(1E-4),
name='hidden-layer-2')
self.outputs = tf.layers.dense(inputs=self.l2,
units=1,
activation=tf.sigmoid,
use_bias=True,
kernel_initializer=tf.truncated_normal_initializer(0.0, 1E-2),
bias_initializer=tf.constant_initializer(0.0),
kernel_regularizer=tf.contrib.layers.l2_regularizer(1E-4),
name='outputs')
# optimization
# self.mean_loss = tf.losses.mean_squared_error(self.labels, self.outputs)
self.mean_loss = tf.reduce_mean(tf.square(self.labels-self.outputs))
self.regularization_loss = tf.losses.get_regularization_loss()
self.loss = self.mean_loss # + self.regularization_loss
self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
The two models are the same. Both of them has two hidden layers with the same dimension. I expect that the problems may come from the kernel initialization but I don't know how to fix it.
Using Keras is great. If you want better TensorFlow integration check out tf.keras. There's no particular reason to use tf.layers if the Keras (or tf.keras) defaults work better.
In this case glorot_uniform looks like the default initializer. This is also the global TensorFlow default, so consider removing the kernel_initializer argument instead of the explicit truncated normal initialization in your question (or passing Glorot explicitly).