pytorch's forward-function for tensorflow - tensorflow

What is the the counterpart in Tensorflow to pyTorch forward function?
I try to translate some pytorch code to tensorflow.

The forward function in nn.module in Pytorch can be replaced by the function "__call__()" in tf.module in Tensorflow or the function call() in tf.keras.layers.layer in keras. This is an example of a simple dense layer in tensorflow and keras:
Tensorflow:
class Dense(tf.Module):
def __init__(self, input_dim, output_size, name=None):
super().__init__(name=name)
self.w = tf.Variable(tf.random.normal([input_dim, output_size]), name='w')
self.b = tf.Variable(tf.zeros([output_size]), name='b')
def __call__(self, x):
y = tf.matmul(x, self.w) + self.b
return tf.nn.relu(y)
Keras:
class Dense(tf.keras.Layers.Layer):
def __init__(self, units=32):
super(SimpleDense, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(self.units,),
initializer='random_normal',
trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
You can check the following links for more details:
https://www.tensorflow.org/api_docs/python/tf/Module
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer

Related

How to train parameters of 2 different classes together?

How to train the parameters of Class1 and Class2 together? That is weights of self.linear1 and self.linear2 fromClass1 along with weight of Class2? Since Class1 calls Class2 as self.conv1 = Class2(w_in, w_out) hence they are interlinked and will form a chain during forward pass. That's why I wish to train them together! What will I write in my training loop, while calculating the grads? grads = tape.gradient(loss, ? )
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Class1(layers.Layer):
def __init__(self, num_channels, w_in, w_out, num_class):
super(Class1, self).__init__()
self.num_channels = num_channels
self.w_in = w_in
self.w_out = w_out
self.conv1 = Class2(w_in, w_out)
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A):
a = self.conv1(A)
return a
class Class2(tf.keras.layers.Layer):
def __init__(self, in_channels, out_channels):
super(Class2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.weight = self.add_weight(
shape= (out_channels,in_channels,1,1), initializer="random_normal", trainable=True)
def call(self, A):
print(A)
A = tf.reduce_sum(A*(tf.nn.softmax(self.weight,1)), 1)
print(A)
return A

Custom RNN stateful model BATCH SIZE training

Willing to train a custom, stateful RNN, but failing to do it so over batches. I followed documentation of tf.keras.layers.RNN, but still workless.
Stand-alone code:
import tensorflow as tf
class RecurrentCell(tf.keras.layers.Layer):
def __init__(self,
units,
**kwargs):
self.units = units
self.state_size = units
self.output_size = 5
super(RecurrentCell, self).__init__(**kwargs)
def build(self, input_shape):
self.training_params = self.add_weight(shape=(1, self.output_size),
initializer='uniform',
name='kernel')
self.built = True
def call(self, inputs, states):
new_states = states
output = inputs + self.training_params
return output, [new_states]
def get_initial_state(self, inputs=None, batch_size=10,dtype=np.float32):
return tf.zeros( tuple([batch_size]) + tuple([self.state_size]), dtype=np.float32)
class RecurrentModel(tf.keras.Model):
def __init__(self, **kwargs):
super(RecurrentModel,self).__init__()
self.recurrent_layer =tf.keras.layers.RNN(RecurrentCell(units=5),
return_sequences=True,
stateful=True,
)
def call(self, inputs):
return self.recurrent_layer(inputs)
class CustomCallback(tf.keras.callbacks.Callback):
def on_train_batch_begin(self, batch, logs=None):
keys = list(logs.keys())
print("...Training: start of batch {}; got log keys: {}".format(batch, keys))
model = RecurrentModel()
model.compile(optimizer="adam", loss="mse")
batch_size = 2
time_index = 10
features = 5
inputs = tf.random.uniform((batch_size, time_index, features))
outputs = tf.random.uniform((batch_size, time_index, features))
model.fit(x=inputs, y=outputs, batch_size=2, shuffle=False, epochs=10, callbacks=[CustomCallback()])
In particular, documentation seems to be also inconsistent with Sequential model (haven't tried the functional API).
Thanks a lot!
matias

Training multiple models defined from the same class in Tensorflow 2.0 fails when using #tf.function

I am using Tensorflow 2.1 to create custom models and custom training loops. My aim is to compare the accuracy of different configurations of my neural network. Specifically, in this case, I am comparing the reconstruction error of an AutoEncoder with varying latent dimension. Hence, I am training my network for one latent dimension then computing the test error and then I redo this process for another latent dimension, and so on. With this process I want to create plots like this:
Plot example:
To speed up the training I want to use the #tf.function decorator for the BackPropagation part of my training loop. However, when I try to train several different networks, looping over the latent dimension I get an error. See below:
ValueError: in converted code:
<ipython-input-19-78bafad21717>:41 grad *
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:33 call *
x_enc = self.encoder(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:9 call *
x = self.dense1(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:748 __call__
self._maybe_build(inputs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:2116 _maybe_build
self.build(input_shapes)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/layers/core.py:1113 build
trainable=True)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:446 add_weight
caching_device=caching_device)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/training/tracking/base.py:744 _add_variable_with_custom_getter
**kwargs_for_getter)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer_utils.py:142 make_variable
shape=variable_shape if variable_shape else None)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:258 __call__
return cls._variable_v1_call(*args, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:219 _variable_v1_call
shape=shape)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:65 getter
return captured_getter(captured_previous, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/eager/def_function.py:502 invalid_creator_scope
"tf.function-decorated function tried to create "
ValueError: tf.function-decorated function tried to create variables on non-first call.
I do not get this error when I remove #tf.function decorator. I believe if it has something to do with Tensorflow creating a computational graph when I use the decorator and this graph remains when I create another instance of my network. Thus, sparking an error since the old graph does not match the new instance of the network. But I am not sure about this at all, since I believe I am missing something fundamental about Tensorflow here!
Below is a very simply version of my code recreating the error. I have tried to remove all the unnecessary parts of the code to make it easier to read and debug. Furthermore, I am generating a very simply training and test set just for the sake of this question.
I have already tried the tf.keras.backend.clear_session() function without any luck.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
#tf.function
def grad(model, inputs):
with tf.GradientTape() as tape:
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = grad(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
# Looping over the latent dimensions
for latent_dim in range(1,10):
model = Autoencoder(latent_dim=3) # Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model, num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model(x_test))).numpy())
plt.figure()
plt.plot(test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There's an error in the code snippet you provided.
I changed last Dense layer unit from 6 to 10.
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
As for your question on training multiple model.
The error message "ValueError: tf.function-decorated function tried to create variables on non-first call" means that the function decorated by #tf.function is creating a new variable on its next iteration, this is not allowed as this function is turned into a graph.
I have modified your back propagation method, I commented out your original code to observe the difference.
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
The main culprit of your original code was the calling of model(inputs) as an input in the Loss Function, when you decorate #tf.function in a function it is inherited on all the functions inside, this means the Loss function is optimized.
Also a way to train multiple model without rewriting single variable, is to put them into array.
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
This will arrange model into array, easier to be accessed and debugged.
Here is the complete modified code.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = gradient_func(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
plt.figure()
plt.plot(range(1,10),test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There is also a brief discussion about #tf.function and AutoGraphs in TF Documentation in this link.
Feel free to ask questions and hope this helps you.

Is there a tensorflow keras that is a wrapper for a stack of Dense layers?

For example, this is trivial but is there a layer for this? Is not really a convolution ... there is one "Dense layer" (weights) per data point.
In [266]: X = np.random.randn(10, 3); W = np.random.randn(10, 3, 4); (X[:, :, None] * W).sum(axis=1).shape
Out[266]: (10, 4)
Create your own layer:
Warning: works only with fixed batch size, you need to define batch_shape or batch_input_shape in your models!!!!
class SampleDense(Layer):
def __init__(self, units, **kwargs):
self.units = units
super(SampleDense, self).__init__(**kwargs)
def build(self, input_shape):
weight_shape = input_shape + (self.units,)
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='uniform',
trainable=True)
self.built = True
def call(self, inputs):
inputs = K.expand_dims(inputs, axis=-1)
outputs = inputs * self.kernel
outputs = K.sum(outputs, axis=-2)
return outputs
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.units,)

Is there any way to debug a value inside a tensor while training on Keras?

I'm trying to implement my own layer on Keras with a TensorFlow backend.
Is there any way to print the value inside tensors while training?
For example, I want to print x and self.kernel in the following code:
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape)
def call(self, x):
# print x
# print self.kernel
return K.dot(x, self.kernel)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
You can use keras.backend.print_tensor, which is just an identity transform that has the side-effect of printing the value of the tensor, and optionally a message. For example:
import keras.backend as K
def call(self, x):
return K.dot(K.print_tensor(x, message='Value of x'),
K.print_tensor(self.kernel,
message='Value of kernel'))
See https://keras.io/backend/#print_tensor for more information.
You can also use tf.print when using the TensorFlow backend.
def call(self, x):
tf.print(x)
tf.print(self.kernel)
return K.dot(x, self.kernel)