How does one use keras add_weight() vars with tensorflow probability distributions? - tensorflow

I am creating a new keras layer which accepts a vector of input data and is parameterized by 2 scalars, a mean and standard deviation. I model the input data as a normal distribution and estimate its mean and variance through gradient descent. However, when I initialize tfp.Normal(mu, sigma) which mu and sigma are from add_weights() during, build(), the gradients do not propagate through mu and sigma.
The tensorflow probability documentation states that you can pass in training variables for distribution parameters and backprop through them. How do I get this to work inside of keras?
Below is a minimal working example.
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfk = tf.keras
tfkl = tf.keras.layers
tfd = tfp.distributions
tfpl = tfp.layers
EPS = 1e-5
batch_size = 4
N = 100
x = np.random.randn(batch_size, N)
class NormalLikelihood(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihood, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight("mean", shape=[1], initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight("std", shape=[1], initializer=tf.keras.initializers.RandomUniform(minval=EPS, maxval=5.0, seed=None), constraint=tf.keras.constraints.non_neg(), dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu[0], self.sigma[0])
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
input_layer = tf.keras.layers.Input(shape=(100,))
r = NormalLikelihood()(input_layer)
r = -tf.reduce_sum(tf.math.log(r))
model = tf.keras.models.Model(input_layer, r)
model.add_loss(r)
model.compile(optimizer='rmsprop', loss=None)
model.fit(x, y=None)
This code results in builtins.ValueError: No gradients provided for any variable: ['normal_likelihood/mean:0', 'normal_likelihood/std:0'] which is not expected. Desired behavior would be that ['normal_likelihood/mean:0', 'normal_likelihood/std:0'] have gradients provided for them.
See the code in google colab: https://colab.research.google.com/drive/1_u4XTCIH-2qwNSgv9zkZiCG_zeCIEZGp?usp=sharing

Change tfp.distributions.Normal(self.mu[0], self.sigma[0]) to tfp.distributions.Normal(self.mu, self.sigma).
The reason this works is because under the hood of the .fit() keras method, the gradient computation is looking for trainable variables. When you index into the weights of the model you're taking the gradient against a constant that destroys the connectivity of the chain rule.
Example:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
EPS = 1e-5
class NormalLikelihoodYours(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihoodYours, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight(
"mean", shape=[1],
initializer=tf.keras.initializers.RandomNormal(
mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight(
"std", shape=[1],
initializer=tf.keras.initializers.RandomUniform(
minval=EPS, maxval=5.0, seed=None),
constraint=tf.keras.constraints.non_neg(),
dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu[0], self.sigma[0])
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
class NormalLikelihoodMine(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihoodMine, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight(
"mean", shape=[1],
initializer=tf.keras.initializers.RandomNormal(
mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight(
"std", shape=[1],
initializer=tf.keras.initializers.RandomUniform(
minval=EPS, maxval=5.0, seed=None),
constraint=tf.keras.constraints.non_neg(),
dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu, self.sigma)
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
# loss function
def calc_loss(logits):
return -tf.math.reduce_sum(tf.math.log(logits))
# model input
input_layer = tf.keras.layers.Input(shape=(100,))
x_in = tf.random.normal([4, 100])
# your model
your_output = NormalLikelihoodYours()(input_layer)
your_model = tf.keras.models.Model(input_layer, your_output)\
# my model
my_output = NormalLikelihoodMine()(input_layer)
my_model = tf.keras.models.Model(input_layer, my_output)
# yours has no gradients because the network weights are not
# included anywhere in the loss calculation. When you index them
# with `[0]` they go from being trainable variables in the network,
# to just constants.
with tf.GradientTape() as tape:
y_hat = your_model(x_in)
loss = calc_loss(y_hat)
print(tape.gradient(loss, your_model.trainable_variables))
# [None, None]
# my model has gradients because `loss` and the weights in
# `trainable_variables` are connected
with tf.GradientTape() as tape:
y_hat = my_model(x_in)
loss = calc_loss(y_hat)
print(tape.gradient(loss, my_model.trainable_variables))
# [<tf.Tensor: shape=(1,), numpy=array([43.83749], dtype=float32)>,
# <tf.Tensor: shape=(1,), numpy=array([-37.348656], dtype=float32)>]

Related

Why my chemical vae cannot learn any thing with toy dataset?

I m trying to implement a mini version of chemical vae referred in this paper: 10.1021/acscentsci.7b00572. The model can be successfully trained, and the loss is changing. However, the predicted properties of all samples are same, near to the mean value. And the autoencoder cannot reconstruct the input data. It means the model cannot learn anything by training. I have carefully check my codes, but failed to find any wrong. Can any one help? Thank you.
Here is my code:
import numpy as np
import tensorflow as tf
# example smiles and properties
smiles = ['CCCCO', 'C1CCCCC1', 'C[C##H](C(=O)O)N', 'C[C#H](C(=O)O)N', 'CC(=O)O'] * 200
y = [1,2,3,4,5] * 200
# smiles to one-hot
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
dicts = set(''.join(smiles))
num_words = len(dicts) + 1
max_lens = 15
tokenizer = Tokenizer(num_words=num_words, char_level=True)
tokenizer.fit_on_texts(smiles)
sequences = tokenizer.texts_to_sequences(smiles)
sequences = pad_sequences(sequences, maxlen = max_lens, padding='post', truncating='post')
x = to_categorical(sequences, num_classes=num_words)
# model
from tensorflow.keras import layers, Model
class VAEWithRegressor(Model):
"""Combines a variational autoencoder with a property regressor."""
def __init__(self, latent_dim):
super(VAEWithRegressor, self).__init__()
# Define the encoder layers
self.encoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=x[0].shape),
layers.GRU(units=64, return_sequences=True),
layers.BatchNormalization(),
layers.GRU(units=32),
layers.BatchNormalization(),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(latent_dim * 2),
]
)
# Define the decoder layers
self.decoder = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=16),
layers.BatchNormalization(),
layers.Dense(units=32),
layers.BatchNormalization(),
layers.RepeatVector(max_lens),
layers.GRU(units = max_lens, return_sequences=True),
layers.BatchNormalization(),
layers.TimeDistributed(layers.Dense(units=num_words)),
layers.Activation('softmax')
]
)
# Define the regressor layers
self.regressor = tf.keras.Sequential(
[
layers.InputLayer(input_shape=(latent_dim,)),
layers.Dense(units=32),
layers.Dense(units=16),
layers.Dense(units=1),
]
)
def encode(self, x):
# Compute the mean and log variance of the latent variable
h = self.encoder(x)
mean, log_var = tf.split(h, num_or_size_splits=2, axis=1)
return mean, log_var
def reparameterize(self, mean, log_var):
# Sample from the latent variable distribution
eps = tf.random.normal(tf.shape(mean))
std_dev = tf.exp(0.5 * log_var)
z = mean + std_dev * eps
return z
def decode(self, z):
# Reconstruct the input from the latent variable
return self.decoder(z)
def predict_properties(self, z):
# Predict the properties of the input
return self.regressor(z)
def call(self, x):
# Define the forward pass of the model
mean, log_var = self.encode(x)
z = self.reparameterize(mean, log_var)
x_pred = self.decode(z)
properties = self.predict_properties(z)
return x_pred, mean, log_var, properties
def vae_loss(self, x, x_pred, mean, log_var):
recon_loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, x_pred), axis = 1)
kl_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis = 1)
return tf.reduce_mean(recon_loss + kl_loss)
def property_loss(self, y_true, y_pred):
# Compute the mean squared error between the true and predicted properties
return tf.reduce_mean(tf.keras.losses.mean_squared_error(y_true, y_pred))
def train_step(self, x, y_true):
with tf.GradientTape() as tape:
x_pred, mean, log_var, y_pred = self.call(x)
vae_loss_value = self.vae_loss(x, x_pred, mean, log_var)
property_loss_value = self.property_loss(y_true, y_pred)
total_loss = vae_loss_value + property_loss_value
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
gradients = tape.gradient(total_loss, self.trainable_variables)
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return vae_loss_value, property_loss_value
latent_dim = 8
num_epochs = 50
batch_size = 256
vae = VAEWithRegressor(latent_dim)
x_train = x
y_train = y
for epoch in range(num_epochs):
epoch_vae_loss = 0
epoch_property_loss = 0
for i in range(0, len(x_train), batch_size):
x_batch = x_train[i:i+batch_size]
y_batch = y_train[i:i+batch_size]
vae_loss_value, property_loss_value = vae.train_step(x_batch, y_batch)
epoch_vae_loss += vae_loss_value
epoch_property_loss += property_loss_value
epoch_vae_loss /= (len(x_train) / batch_size)
epoch_property_loss /= (len(x_train) / batch_size)
print('Epoch {}, VAE loss: {}, Property loss: {}'.format(epoch+1, epoch_vae_loss, epoch_property_loss))
z_sample = vae.encoder.predict(x)[:,:latent_dim]
x_pred = np.array(vae.decoder.predict(z_sample))
y_pred = np.array(vae.predict_properties(z_sample))

How to handle target decoder inputs for self attention transformer model during predict()

My question is essentially a duplicate of this one, where I'm confused as to what to pass into the decoder during the predict() (i.e., call()) phase. I've modified tutorials found here and here in order to create this script. This is being used for the purposes of self-attention on a time series dataset for regression (not NLP).
There's too much boilerplate to provide the full model so I'll write in the pertinent script:
Transformer.py
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
# The following imports are my custom Layers/Functions
from Utilities.MachineLearning.Keras.Layers.Encoder import Encoder
from Utilities.MachineLearning.Keras.Layers.Decoder import Decoder
from Utilities.MachineLearning.Keras.Functions.etc import create_padding_mask, create_look_ahead_mask
def create_masks(input, target):
# Encoder padding mask
encoder_mask = create_padding_mask(input)
# Used in the 2nd attention block in the decoder.
# This padding mask is used to mask the encoder outputs.
decoder_mask = create_padding_mask(input)
# Used in the 1st attention block in the decoder.
# It is used to pad and mask future tokens in the input received by
# the decoder.
look_ahead_mask = create_look_ahead_mask(tf.shape(target)[1])
target_mask = create_padding_mask(target)
encoder_decoder_mask = tf.maximum(target_mask, look_ahead_mask)
return encoder_mask, encoder_decoder_mask, decoder_mask
class Transformer(Model):
def __init__(
self,
num_inputs,
num_outputs=1,
num_heads=1,
num_layers=1,
num_embedding_inputs=None,
num_ff_inputs=None,
dropout=0,
):
super().__init__()
self.encoder = Encoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.decoder = Decoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.output_layer = Dense(num_outputs, name="Output")
def call(
self,
inputs,
targets,
training=None,
):
encoder_mask, encoder_decoder_mask, decoder_mask = create_masks(inputs, targets)
encoder_output = self.encoder(inputs, encoder_mask, training)
decoder_output, attention_weights = self.decoder(
targets, encoder_output, encoder_decoder_mask, decoder_mask, training
)
output = self.output_layer(decoder_output)
return output, attention_weights
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]
#tf.function(input_signature=train_step_signature)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, y, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
SelfAttention.py
# Don't worry about what Custom is, it's basically a modified Keras Model
from Utilities.MachineLearning.Keras.Models.Custom import Custom
from Utilities.MachineLearning.Keras.Models.Transformer import Transformer
class SelfAttention(Custom):
def initialize(self):
self.transformer = Transformer(
self.batch_input_shape[-1],
num_heads=self.attention_units,
dropout=self.attention_dropout,
name="Transformer",
)
def call(self, inputs, training=False):
# TODO: What about `targets`?
return self.transformer(inputs, training=training)
There was no point in using a decoder as all the relevant information for time series data is used by the encoder block.

How to write a custom call function for a Tensorflow LSTM class?

I have defined a custom LSTM Layer as follows:
class LSTMModel(tf.keras.Model):
def __init__(self, CNN_model, num_classes):
super().__init__()
self.cnn_model = CNN_model
self.lstm = tf.keras.layers.LSTM(units=64, return_state=True, dropout=0.3)
self.dense = tf.keras.layers.Dense(num_classes, activation="softmax")
def call(self, input):
pass
However, I am unclear what needs too occur in the call function here. I also wrote a generic CNN class like below:
class generic_vns_function(tf.keras.Model):
# Where would we use layer_units here?
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same",
input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
but here the required structure makes a lot more sense to me. I am just initializing all of the layers. What do I need to do to initialize my LSTM layers?
You could write it like this:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import Model
class LSTMModel(Model):
def __init__(self, num_classes, num_units=64, drop_prob=0.3):
super().__init__()
self.num_classes = num_classes
self.num_units = num_units
self.drop_prob = drop_prob
self.lstm = LSTM(
units=self.num_units,
return_state=True,
dropout=self.drop_prob)
self.dense = Dense(
num_classes,
activation="softmax")
def call(self, x, training=True):
x, *state = self.lstm(x, training=training)
x = self.dense(x)
return x
And then you would use it like:
model = LSTMModel(num_classes=2)
time_series = tf.random.normal((32, 64, 128))
x_pred = model(time_series)
# loss and gradients calculations ...
It is a common tensorflow idom to instantiate layers when initializing a custom layer/model, and then execute their call() methods by passing data through them in your custom call implementation.

Training multiple models defined from the same class in Tensorflow 2.0 fails when using #tf.function

I am using Tensorflow 2.1 to create custom models and custom training loops. My aim is to compare the accuracy of different configurations of my neural network. Specifically, in this case, I am comparing the reconstruction error of an AutoEncoder with varying latent dimension. Hence, I am training my network for one latent dimension then computing the test error and then I redo this process for another latent dimension, and so on. With this process I want to create plots like this:
Plot example:
To speed up the training I want to use the #tf.function decorator for the BackPropagation part of my training loop. However, when I try to train several different networks, looping over the latent dimension I get an error. See below:
ValueError: in converted code:
<ipython-input-19-78bafad21717>:41 grad *
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:33 call *
x_enc = self.encoder(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
<ipython-input-19-78bafad21717>:9 call *
x = self.dense1(inp)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:748 __call__
self._maybe_build(inputs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:2116 _maybe_build
self.build(input_shapes)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/layers/core.py:1113 build
trainable=True)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:446 add_weight
caching_device=caching_device)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/training/tracking/base.py:744 _add_variable_with_custom_getter
**kwargs_for_getter)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer_utils.py:142 make_variable
shape=variable_shape if variable_shape else None)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:258 __call__
return cls._variable_v1_call(*args, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:219 _variable_v1_call
shape=shape)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/variables.py:65 getter
return captured_getter(captured_previous, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/eager/def_function.py:502 invalid_creator_scope
"tf.function-decorated function tried to create "
ValueError: tf.function-decorated function tried to create variables on non-first call.
I do not get this error when I remove #tf.function decorator. I believe if it has something to do with Tensorflow creating a computational graph when I use the decorator and this graph remains when I create another instance of my network. Thus, sparking an error since the old graph does not match the new instance of the network. But I am not sure about this at all, since I believe I am missing something fundamental about Tensorflow here!
Below is a very simply version of my code recreating the error. I have tried to remove all the unnecessary parts of the code to make it easier to read and debug. Furthermore, I am generating a very simply training and test set just for the sake of this question.
I have already tried the tf.keras.backend.clear_session() function without any luck.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
#tf.function
def grad(model, inputs):
with tf.GradientTape() as tape:
loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = grad(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
# Looping over the latent dimensions
for latent_dim in range(1,10):
model = Autoencoder(latent_dim=3) # Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model, num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model(x_test))).numpy())
plt.figure()
plt.plot(test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There's an error in the code snippet you provided.
I changed last Dense layer unit from 6 to 10.
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
As for your question on training multiple model.
The error message "ValueError: tf.function-decorated function tried to create variables on non-first call" means that the function decorated by #tf.function is creating a new variable on its next iteration, this is not allowed as this function is turned into a graph.
I have modified your back propagation method, I commented out your original code to observe the difference.
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
The main culprit of your original code was the calling of model(inputs) as an input in the Loss Function, when you decorate #tf.function in a function it is inherited on all the functions inside, this means the Loss function is optimized.
Also a way to train multiple model without rewriting single variable, is to put them into array.
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
This will arrange model into array, easier to be accessed and debugged.
Here is the complete modified code.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Encoder
class build_encoder(tf.keras.Model):
def __init__(self,latent_dim):
super(build_encoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(latent_dim, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Decoder
class build_decoder(tf.keras.Model):
def __init__(self,):
super(build_decoder, self).__init__()
self.dense1 = tf.keras.layers.Dense(32, activation='relu',use_bias=True)
self.dense2 = tf.keras.layers.Dense(10, activation='relu',use_bias=True)
def call(self, inp):
x = self.dense1(inp)
x = self.dense2(x)
return x
# Full Autoencoder
class Autoencoder(tf.keras.Model):
def __init__(self,latent_dim=5):
super(Autoencoder, self).__init__()
self.encoder = build_encoder(latent_dim)
self.decoder = build_decoder()
def call(self, inp):
x_enc = self.encoder(inp)
x_dec = self.decoder(x_enc)
return x_dec
#### Here is the backpropagation with #tf.function decorator ####
# #tf.function
# def grad(model, inputs):
# with tf.GradientTape() as tape:
# loss_value = tf.losses.mean_squared_error(inputs, model(inputs))
# return loss_value, tape.gradient(loss_value, model.trainable_variables)
#tf.function
def MSE(y_true, y_pred):
return tf.keras.losses.MSE(y_true, y_pred)
def backprop(inputs, model):
with tf.GradientTape() as tape:
loss_value = MSE(inputs, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def gradient_func(model, inputs):
return backprop(inputs, model)
# Training loop function
def train(x_train, model, num_epochs, batch_size,optimizer):
train_loss = []
for epoch in range(num_epochs):
tf.random.shuffle(x_train)
for i in range(0, len(x_train), batch_size):
x_inp = x_train[i: i + batch_size]
loss_value, grads = gradient_func(model, x_inp)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_train, model(x_train))).numpy())
if epoch % 100 == 0:
print("Epoch: {}, Train loss: {:.9f}".format(epoch, train_loss[epoch]))
return train_loss
#### Generating simple training and test data
num_train = 10000
num_test = 1000
x_train = s = np.random.uniform(0,1,(num_train,10)).astype(np.float32)
x_train[:,6:10] = 0
x_test = s = np.random.uniform(0,1,(num_test,10)).astype(np.float32)
x_test[:,6:10] = 0
###
batch_size = 8
num_epochs = 10000
test_loss = []
model_array = [0]
# Looping over the latent dimensions
for latent_dim in range(1,10):
model_array.append(Autoencoder(latent_dim))
# Creating an instance of my Autoencoder
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005) # Defining an optimizer
train_loss = train(x_train, model=model_array[latent_dim], num_epochs=num_epochs, batch_size=batch_size, optimizer=optimizer) # Training the network
test_loss.append(tf.reduce_mean(tf.losses.mean_squared_error(x_test, model_array[latent_dim](x_test))).numpy())
plt.figure()
plt.plot(range(1,10),test_loss,linewidth=1.5)
plt.grid(True)
plt.show()
There is also a brief discussion about #tf.function and AutoGraphs in TF Documentation in this link.
Feel free to ask questions and hope this helps you.

How to input csv data in an autoencoder

I am using the code below that implements an autoencoder. How can I feed the autoencoder with data for training and testing?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
class Autoencoder(object):
def __init__(self, n_input, n_hidden, transfer_function=tf.nn.softplus, optimizer = tf.train.AdamOptimizer()):
self.n_input = n_input
self.n_hidden = n_hidden
self.transfer = transfer_function
network_weights = self._initialize_weights()
self.weights = network_weights
# model
self.x = tf.placeholder(tf.float32, [None, self.n_input])
self.hidden = self.transfer(tf.add(tf.matmul(self.x, self.weights['w1']), self.weights['b1']))
self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
# cost
self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
self.optimizer = optimizer.minimize(self.cost)
init = tf.global_variables_initializer()
self.sess = tf.Session()
self.sess.run(init)
def _initialize_weights(self):
all_weights = dict()
all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32))
return all_weights
def partial_fit(self, X):
cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict={self.x: X})
return cost
def calc_total_cost(self, X):
return self.sess.run(self.cost, feed_dict = {self.x: X})
def transform(self, X):
return self.sess.run(self.hidden, feed_dict={self.x: X})
def generate(self, hidden = None):
if hidden is None:
hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
return self.sess.run(self.reconstruction, feed_dict={self.hidden: hidden})
def reconstruct(self, X):
return self.sess.run(self.reconstruction, feed_dict={self.x: X})
def getWeights(self):
return self.sess.run(self.weights['w1'])
def getBiases(self):
return self.sess.run(self.weights['b1'])
# I instantiate the class autoencoder, 5 is the dimension of a raw input,
2 is the dimension of the hidden layer
autoencoder = Autoencoder(5, 2, transfer_function=tf.nn.softplus, optimizer
= tf.train.AdamOptimizer())
# I prepare my data**
IRIS_TRAINING = "C:\\Users\\Desktop\\iris_training.csv"
#Feeding data to Autoencoder ???
Train and Test ??
How can I train this model with csv file data? I think I need to run the following instruction as _, c = sess.run([optimizer, cost], feed_dict={self.x: batch_ofd_ata}) inside a loop of epochs, but I am struggling with it.
Check out Stanford CS20SI's tutorial.
https://github.com/chiphuyen/tf-stanford-tutorials/blob/master/examples/05_csv_reader.py