How to train the parameters of Class1 and Class2 together? That is weights of self.linear1 and self.linear2 fromClass1 along with weight of Class2? Since Class1 calls Class2 as self.conv1 = Class2(w_in, w_out) hence they are interlinked and will form a chain during forward pass. That's why I wish to train them together! What will I write in my training loop, while calculating the grads? grads = tape.gradient(loss, ? )
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Class1(layers.Layer):
def __init__(self, num_channels, w_in, w_out, num_class):
super(Class1, self).__init__()
self.num_channels = num_channels
self.w_in = w_in
self.w_out = w_out
self.conv1 = Class2(w_in, w_out)
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A):
a = self.conv1(A)
return a
class Class2(tf.keras.layers.Layer):
def __init__(self, in_channels, out_channels):
super(Class2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.weight = self.add_weight(
shape= (out_channels,in_channels,1,1), initializer="random_normal", trainable=True)
def call(self, A):
print(A)
A = tf.reduce_sum(A*(tf.nn.softmax(self.weight,1)), 1)
print(A)
return A
Willing to train a custom, stateful RNN, but failing to do it so over batches. I followed documentation of tf.keras.layers.RNN, but still workless.
Stand-alone code:
import tensorflow as tf
class RecurrentCell(tf.keras.layers.Layer):
def __init__(self,
units,
**kwargs):
self.units = units
self.state_size = units
self.output_size = 5
super(RecurrentCell, self).__init__(**kwargs)
def build(self, input_shape):
self.training_params = self.add_weight(shape=(1, self.output_size),
initializer='uniform',
name='kernel')
self.built = True
def call(self, inputs, states):
new_states = states
output = inputs + self.training_params
return output, [new_states]
def get_initial_state(self, inputs=None, batch_size=10,dtype=np.float32):
return tf.zeros( tuple([batch_size]) + tuple([self.state_size]), dtype=np.float32)
class RecurrentModel(tf.keras.Model):
def __init__(self, **kwargs):
super(RecurrentModel,self).__init__()
self.recurrent_layer =tf.keras.layers.RNN(RecurrentCell(units=5),
return_sequences=True,
stateful=True,
)
def call(self, inputs):
return self.recurrent_layer(inputs)
class CustomCallback(tf.keras.callbacks.Callback):
def on_train_batch_begin(self, batch, logs=None):
keys = list(logs.keys())
print("...Training: start of batch {}; got log keys: {}".format(batch, keys))
model = RecurrentModel()
model.compile(optimizer="adam", loss="mse")
batch_size = 2
time_index = 10
features = 5
inputs = tf.random.uniform((batch_size, time_index, features))
outputs = tf.random.uniform((batch_size, time_index, features))
model.fit(x=inputs, y=outputs, batch_size=2, shuffle=False, epochs=10, callbacks=[CustomCallback()])
In particular, documentation seems to be also inconsistent with Sequential model (haven't tried the functional API).
Thanks a lot!
matias
I am using Tensorflow 2.1 to create custom models and custom training loops. My aim is to compare the accuracy of different configurations of my neural network. Specifically, in this case, I am comparing the reconstruction error of an AutoEncoder with varying latent dimension. Hence, I am training my network for one latent dimension then computing the test error and then I redo this process for another latent dimension, and so on. With this process I want to create plots like this:
Plot example:
To speed up the training I want to use the #tf.function decorator for the BackPropagation part of my training loop. However, when I try to train several different networks, looping over the latent dimension I get an error. See below:
ValueError: in converted code:
<ipython-input-19-78bafad21717>:41 grad *
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:33 call *
x_enc = self.encoder(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:9 call *
x = self.dense1(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:748 __call__
self._maybe_build(inputs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:2116 _maybe_build
self.build(input_shapes)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/layers/core.py:1113 build
trainable=True)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:446 add_weight
caching_device=caching_device)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/training/tracking/base.py:744 _add_variable_with_custom_getter
**kwargs_for_getter)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer_utils.py:142 make_variable
shape=variable_shape if variable_shape else None)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:258 __call__
return cls._variable_v1_call(*args, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:219 _variable_v1_call
shape=shape)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:65 getter
return captured_getter(captured_previous, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/eager/def_function.py:502 invalid_creator_scope
"tf.function-decorated function tried to create "
ValueError: tf.function-decorated function tried to create variables on non-first call.
I do not get this error when I remove #tf.function decorator. I believe if it has something to do with Tensorflow creating a computational graph when I use the decorator and this graph remains when I create another instance of my network. Thus, sparking an error since the old graph does not match the new instance of the network. But I am not sure about this at all, since I believe I am missing something fundamental about Tensorflow here!
Below is a very simply version of my code recreating the error. I have tried to remove all the unnecessary parts of the code to make it easier to read and debug. Furthermore, I am generating a very simply training and test set just for the sake of this question.
I have already tried the tf.keras.backend.clear_session() function without any luck.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
#tf.function
def grad(model, inputs):
with tf.GradientTape() as tape:
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = grad(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
# Looping over the latent dimensions
for latent_dim in range(1,10):
model = Autoencoder(latent_dim=3) # Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model, num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model(x_test))).numpy())
plt.figure()
plt.plot(test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There's an error in the code snippet you provided.
I changed last Dense layer unit from 6 to 10.
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
As for your question on training multiple model.
The error message "ValueError: tf.function-decorated function tried to create variables on non-first call" means that the function decorated by #tf.function is creating a new variable on its next iteration, this is not allowed as this function is turned into a graph.
I have modified your back propagation method, I commented out your original code to observe the difference.
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
The main culprit of your original code was the calling of model(inputs) as an input in the Loss Function, when you decorate #tf.function in a function it is inherited on all the functions inside, this means the Loss function is optimized.
Also a way to train multiple model without rewriting single variable, is to put them into array.
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
This will arrange model into array, easier to be accessed and debugged.
Here is the complete modified code.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = gradient_func(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
plt.figure()
plt.plot(range(1,10),test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There is also a brief discussion about #tf.function and AutoGraphs in TF Documentation in this link.
Feel free to ask questions and hope this helps you.
For example, this is trivial but is there a layer for this? Is not really a convolution ... there is one "Dense layer" (weights) per data point.
In [266]: X = np.random.randn(10, 3); W = np.random.randn(10, 3, 4); (X[:, :, None] * W).sum(axis=1).shape
Out[266]: (10, 4)
Create your own layer:
Warning: works only with fixed batch size, you need to define batch_shape or batch_input_shape in your models!!!!
class SampleDense(Layer):
def __init__(self, units, **kwargs):
self.units = units
super(SampleDense, self).__init__(**kwargs)
def build(self, input_shape):
weight_shape = input_shape + (self.units,)
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='uniform',
trainable=True)
self.built = True
def call(self, inputs):
inputs = K.expand_dims(inputs, axis=-1)
outputs = inputs * self.kernel
outputs = K.sum(outputs, axis=-2)
return outputs
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.units,)
I'm trying to implement my own layer on Keras with a TensorFlow backend.
Is there any way to print the value inside tensors while training?
For example, I want to print x and self.kernel in the following code:
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape)
def call(self, x):
# print x
# print self.kernel
return K.dot(x, self.kernel)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
You can use keras.backend.print_tensor, which is just an identity transform that has the side-effect of printing the value of the tensor, and optionally a message. For example:
import keras.backend as K
def call(self, x):
return K.dot(K.print_tensor(x, message='Value of x'),
K.print_tensor(self.kernel,
message='Value of kernel'))
See https://keras.io/backend/#print_tensor for more information.
You can also use tf.print when using the TensorFlow backend.
def call(self, x):
tf.print(x)
tf.print(self.kernel)
return K.dot(x, self.kernel)