Stateful LSTM VAE: Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [batch_size, latent_dim] - tensorflow

I am solving a Timeseries problem using LSTM VAE(Variational auto-encoder), I have built my VAE model as below
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
class VAE:
def __init__(self,
hidden_layer_units,
hidden_layer_leakyrelu_alphas,
hidden_layer_dropout_rates,
batch_size,
time_steps,
num_features,
is_stateful_learning):
self.hidden_layer_units = hidden_layer_units
self.hidden_layer_leakyrelu_alphas = hidden_layer_leakyrelu_alphas
self.hidden_layer_dropout_rates = hidden_layer_dropout_rates
self.encoder_num_layers = 0
self.latent_space_dim = 0
vae_total_layers = len(hidden_layer_units)
if 0 < vae_total_layers:
self.encoder_num_layers = int((vae_total_layers - 1) / 2)
self.latent_space_dim = self.hidden_layer_units[self.encoder_num_layers]
self.batch_size = batch_size
self.time_steps = time_steps
self.num_features = num_features
self.is_stateful_learning = is_stateful_learning
self.encoder = None
self.decoder = None
self.model = None
self.model_input = None
self.model_output = None
self.mu = None
self.log_variance = None
self.kulback_coef = 0.0001
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss, self._calculate_kl_loss])
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_encoder(self):
encoder_input = self._add_encoder_input()
lstm_layers = self._add_encoder_lstm_layers(encoder_input)
bottleneck = self._add_bottleneck(lstm_layers)
self.model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
repeater_layer = self._add_repeater_layer(decoder_input)
lstm_layer = self._add_decoder_lstm_layer(repeater_layer)
decoder_output = self._add_decoder_output(lstm_layer)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _build_autoencoder(self):
model_input = self.model_input
encoder_output = self.encoder(model_input)
model_output = self.decoder(encoder_output)
self.model_output = model_output
self.model = Model(model_input, model_output, name="autoencoder")
def _add_encoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.time_steps, self.num_features), name="encoder_input")
else:
x = Input(shape=(self.time_steps, self.num_features), name="encoder_input")
return x
def _add_encoder_lstm_layers(self, encoder_input):
""" Create all lstm layers in encoder."""
x = encoder_input
for layer_index, units in enumerate(self.hidden_layer_units[:self.encoder_num_layers]):
lstm_params = {}
if layer_index < self.encoder_num_layers - 1:
lstm_params["return_sequences"] = True
if self.is_stateful_learning:
lstm_params["stateful"] = True
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_index])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_index])(x)
return x
def _add_bottleneck(self, x):
""" add bottleneck with Guassian sampling (Dense layer)."""
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim, name="log_variance")(x)
x = Lambda(self.sample_point_from_normal_distribution, name="encoder_output")([self.mu, self.log_variance])
return x
def sample_point_from_normal_distribution(self, args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
def _add_decoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.latent_space_dim), name="decoder_input")
else:
x = Input(shape=(self.latent_space_dim), name="decoder_input")
return x
def _add_repeater_layer(self, decoder_input):
return RepeatVector(self.time_steps)(decoder_input)
def _add_decoder_lstm_layer(self, repeater_layer):
x = repeater_layer
for layer_index, units in enumerate(self.hidden_layer_units[self.encoder_num_layers + 1:]):
lstm_params = {}
if self.is_stateful_learning:
# stateful build
lstm_params = {'stateful': True, 'return_sequences': True}
else:
lstm_params["return_sequences"] = True
layer_no = layer_index + self.encoder_num_layers + 1
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_no])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_no])(x)
return x
def _add_decoder_output(self, lstm_layer):
return TimeDistributed(Dense(1))(lstm_layer)
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = reconstruction_loss + (self.kulback_coef * kl_loss)
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=1)
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) - K.exp(self.log_variance), axis=1)
return kl_loss
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
vae.compile(learning_rate)
vae.summary()
return vae.model
Model training block looks as below
# configuration
nn_lstm_layer_units = [160, 3, 160]
nn_leakyrelu_layer_alphas = [0.0, 0.0, 0.0]
nn_dropout_layer_rates = [0.3, 0.0, 0.3]
batch_size = 96
win_length = 64
num_features = 6 # You can use single variate Timeseries data as well, num_features = 1
epochs = 782
learning_rate = 0.0001
want_stateful_learning = True
# Build LSTM VAE model
model = build_lstm_neural_network(nn_lstm_layer_units, nn_leakyrelu_layer_alphas, nn_dropout_layer_rates, batch_size,
win_length, num_features, want_stateful_learning)
TIME_STEPS = win_length
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i: (i + time_steps)])
return np.stack(output)
x_train = create_sequences(x_train)
x_val = create_sequences(x_val)
callbacks = []
unfit_train_record_count = 0
unfit_val_record_count = 0
if want_stateful_learning:
# stateful learning
# adjust train data size(should be in multiples of batch size)
unfit_train_record_count = len(x_train) % batch_size
unfit_val_record_count = len(x_val) % batch_size
# Reset states of the stateful model on epoch end
stateful_model_reset_states = LambdaCallback(on_epoch_end=lambda batch, logs: model.reset_states())
callbacks.append(stateful_model_reset_states)
early_stopping = EarlyStopping(monitor=monitor, patience=patience)
callbacks.append(early_stopping)
# Model traning
history = model.fit(x=x_train[unfit_train_record_count:], y=x_train[unfit_train_record_count:, :, [0]], validation_data=(x_val[unfit_val_record_count:], x_val[unfit_val_record_count:, :, [0]]), batch_size=batch_size, epochs=epochs, shuffle=False, callbacks=callbacks)
The stateless mode of the model is working as expected but the stateful mode is throwing an error as below-
1632/1632 [==============================] - ETA: 0s - loss: 0.2447 - _calculate_reconstruction_loss: 0.2447 - _calculate_kl_loss: 0.0326
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
[[metrics/_calculate_reconstruction_loss/Identity/_229]]
(1) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
Environment used is as
Python-3.8.12,
Tensorflow-gpu: 2.5,
cudnn: 8.2.1.32
I am not clear why the stateful model run 1 Epoch for training data, but as soon as it starts to process the validation data, it throws the error.

I had the same experiences with dataset and loss function that not suitable, I try to simulate again it possible no loss value change, no loss as nan, error when validation.
That is possible no value, no match or not update neuron, you can use Tensorflow 2.x is a lot moire easier.
This is no match validation: Working on training but results in errors when validation. ( one possible )
Epoch 1/100
2022-01-23 21:04:59.846791: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - ETA: 0s - loss: 3.1866 - accuracy: 0.0000e+00Traceback (most recent call last):
Another possible is loss Fn no match: It is possible they are not update the neurons
Epoch 1/100
2022-01-23 21:08:23.330068: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - 3s 3s/step - loss: 13.7138 - accuracy: 0.2000 - val_loss: 8.2133 - val_accuracy: 0.0000e+00
Epoch 2/100
1/1 [==============================] - 0s 65ms/step - loss: 7.7745 - accuracy: 0.0000e+00 - val_loss: 8.0456 - val_accuracy: 0.0000e+00

I solved the problem, by changing the loss calculation logic, instead of defining the functions to calculate reconstruction and KL loss in the VAE class, I moved the loss calculation part outside the VAE class as below
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
# Add reconstruction loss
error = vae.model_input - vae.model_output
reconstruction_loss = K.mean(K.square(error))
vae.model.add_loss(reconstruction_loss)
vae.model.add_metric(reconstruction_loss, name='mse_loss', aggregation='mean')
# Add KL loss
kl_loss = kl_beta * K.mean(-0.5 * K.sum(1 + vae.log_variance - K.square(vae.mu) - K.exp(vae.log_variance), axis = 1), axis=0)
model.add_loss(kl_loss)
model.add_metric(kl_loss, name='kl_loss', aggregation='mean')
optimizer = Adam(learning_rate=vae.learning_rate, clipvalue=vae.clipvalue)
vae.model.compile(loss=None, optimizer=optimizer)
vae.summary()
return vae.model

Related

Tensorflow with custom loss containing multiple inputs - Graph disconnected error

I have a CNN output a scalar, this output is concatenated with the output of an MLP and then fed to another dense layer. I get a Graph Disconnected error
Please advise as to how to fix this. Thanks in advance.
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, concatenate, Input
import tensorflow as tf
tf.keras.backend.clear_session()
#----custom function
def custom_loss(ytrue, ypred):
loss = tf.math.log(1. + ytrue) - tf.math.log(1. + ypred)
loss = tf.math.square(loss)
loss = tf.math.reduce_mean(loss)
return loss
#------------------
cnnin = Input(shape=(10, 10, 1))
x = Conv2D(8, 4)(cnnin)
x = Conv2D(16, 4)(x)
x = Conv2D(32, 2)(x)
x = Conv2D(64, 2)(x)
x = Flatten()(x)
x = Dense(4)(x)
x = Dense(4, activation="relu")(x)
cnnout = Dense(1, activation="linear")(x)
cnnmodel= Model(cnnin, cnnout, name="cnn_model")
yt = Input(shape=(2, )) #---dummy input
#---mlp start
mlpin = Input(shape=(2, ), name="mlp_input")
z = Dense(4, activation="sigmoid")(mlpin)
z = Dense(4, activation = "softmax")(z)
mlpout = Dense(1, activation="linear")(z)
mlpmodel = Model(mlpin, mlpout, name="mlp_model")
#----concatenate
combinedout = concatenate([mlpmodel.output, cnnmodel.output ])
x = Dense(4, activation="sigmoid")(combinedout)
finalout = Dense(2, activation="linear")(x)
model = Model( [mlpin, cnnin], finalout)
model.add_loss(custom_loss(yt, finalout))
model.compile(optimizer='adam', learning_rate=1e-3, initialization="glorotnorm",
loss=None)
Graph disconnected: cannot obtain value for tensor Tensor("input_8:0", shape=(None, 2), dtype=float32) at layer "input_8". The following previous layers were accessed without issue: ['input_7', 'conv2d_12', 'conv2d_13', 'conv2d_14', 'conv2d_15', 'flatten_3', 'mlp_input', 'dense_24', 'dense_27', 'dense_25', 'dense_28', 'dense_29', 'dense_26', 'concatenate_3', 'dense_30', 'dense_31']
You can customize what happens in Model.fit based on https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit
We create a new class that subclasses keras.Model.
We just override the method train_step(self, data).
We return a dictionary mapping metric names (including the loss) to
their current value.
For example with your models:
loss_tracker = tf.keras.metrics.Mean(name = "custom_loss")
class TestModel(tf.keras.Model):
def __init__(self, model1):
super(TestModel, self).__init__()
self.model1 = model1
def compile(self, optimizer):
super(TestModel, self).compile()
self.optimizer = optimizer
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
ypred = self.model1([x], training = True)
loss_value = custom_loss(y, ypred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss_value, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
loss_tracker.update_state(loss_value)
return {"loss": loss_tracker.result()}
import numpy as np
x = np.random.rand(6, 10,10,1)
x2 = np.random.rand(6,2)
y = tf.ones((6,2))
model = Model( [mlpin, cnnin], finalout)
trainable_model = TestModel(model)
trainable_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001))
trainable_model.fit(x=(x2, x), y = y, epochs=5)
Gives the following output:
Epoch 1/5
1/1 [==============================] - 0s 382ms/step - loss: 0.2641
Epoch 2/5
1/1 [==============================] - 0s 4ms/step - loss: 0.2640
Epoch 3/5
1/1 [==============================] - 0s 6ms/step - loss: 0.2638
Epoch 4/5
1/1 [==============================] - 0s 7ms/step - loss: 0.2635
Epoch 5/5
1/1 [==============================] - 0s 6ms/step - loss: 0.2632
<tensorflow.python.keras.callbacks.History at 0x14c69572688>

How is add_loss and compile's loss combined for the gradient calculation?

You can specify the loss in a keras tensorflow model in two ways. You can use add_loss and you can also specify the loss in compile's loss argument. Since the gradient is taken with respect to some loss in order to do the weight updates, I would imagine that there needs to be a single function somehow combining those losses into one. Are they just added together?
For example, let's say I have the following model. The only important lines are
self.add_loss(kl_loss) and autoencoder.compile(optimizer=optimizer, loss=r_loss, metrics=[r_loss]).
class Autoencoder(Model):
def __init__(self):
super(Autoencoder, self).__init__()
encoder_input = layers.Input(shape=INPUT_SHAPE, name='encoder_input')
x = encoder_input
# ...
x = layers.Flatten()(x)
mu = layers.Dense(LATENT_DIM, name='mu')(x)
log_var = layers.Dense(LATENT_DIM, name='log_var')(x)
def sample(args):
mu, log_var = args
epsilon = tf.random.normal(shape=K.shape(mu), mean=0., stddev=1.)
return mu + tf.math.exp(log_var / 2) * epsilon
encoder_output = layers.Lambda(sample, name='encoder_output')([mu, log_var])
self.encoder = Model(encoder_input, outputs=[encoder_output, mu, log_var])
self.decoder = tf.keras.Sequential([
layers.Input(shape=LATENT_DIM),
# ...
def call(self, x):
encoded, mu, log_var = self.encoder(x)
kl_loss = tf.math.reduce_mean(-0.5 * tf.math.reduce_sum(1 + log_var - tf.math.square(mu) - tf.math.exp(log_var)))
self.add_loss(kl_loss)
decoded = self.decoder(encoded)
return decoded
def train_autoencoder():
autoencoder = Autoencoder()
def r_loss(y_true, y_pred):
return tf.math.reduce_sum(tf.math.square(y_true - y_pred), axis=[1, 2, 3])
optimizer = tf.keras.optimizers.Adam(1e-4)
autoencoder.compile(optimizer=optimizer, loss=r_loss, metrics=[r_loss])
When I train my model, I see the following values:
Epoch 00001: saving model to models/autoencoder/cp-autoencoder.ckpt
1272/1272 [==============================] - 249s 191ms/step - batch: 635.5000 - size: 1.0000 - loss: 5300.4540 - r_loss: 2856.8228
Both losses go down together. What exactly is the loss in the above snippet?

How to fix the fetch argument error in implementing Bayesian Neural Network with tenssorflow

placeholder_X = tf.placeholder(tf.float32, shape = [None, 19])
placeholder_y = tf.placeholder(tf.float32, shape = [None,1])
#Build an iterator over training batches
#training_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
training_dataset = tf.data.Dataset.from_tensor_slices((placeholder_X, placeholder_y))
#Shuffle the dataset (note shuffle argument much larger than training size).learning_rate # shuffling of data
# and form batches of size batch_size
training_batches = training_dataset.shuffle(20000, reshuffle_each_iteration =True).repeat().batch(FLAGS.batch_size)
#training_iterator = tf.data.make_one_shot_iterator(training_batches)
#Building iterator over the heldout set with batch_size = heldout_size,
# i.e., return the entire heldout set as a constant.
val_dataset = tf.data.Dataset.from_tensor_slices((placeholder_X, placeholder_y))
val_batches = val_dataset.repeat().batch(500)
#heldout_iterator = tf.data.make_one_shot_iterator(heldout_batches)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test,y_test))
test_dataset = test_dataset.batch(500)
#Combine these into a feasible iterator that can switch between training
# and validation inputs.
# Here should be minibatch increment be defined
handle = tf.placeholder(tf.string, shape = [])
feedable_iterator = tf.data.Iterator.from_string_handle(handle, training_batches.output_types, training_batches.output_shapes)
features_final, labels_final = feedable_iterator.get_next()
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator = tf.data.Iterator.from_structure(training_batches.output_types, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
def main(argv):
# extract the activation function from the hyperopt spec as an attribute from the tf.nn module
#activation = getattr(tf.nn, FLAGS.activation_function)
# define the graph
#with tf.Graph().as_default():
# Building the Bayesian Neural Network
# we are Gaussian Reparametrization Trick
# to compute the stochastic gradients as described in the paper
with tf.compat.v1.name_scope("bayesian_neural_net", values =[features_final]):
neural_net = tf.keras.Sequential()
for i in range(FLAGS.num_hidden_layers):
layer = tfp.layers.DenseReparameterization(
units = 10,
activation = tf.nn.relu,
trainable = True,
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
#kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
)
neural_net.add(layer)
neural_net.add(tfp.layers.DenseReparameterization(
units=2, # one dimensional output
activation= tf.nn.softmax, # since regression (outcome not bounded)
trainable=True, # i.e subject to optimization
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn =tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
))
logits = neural_net(features_final)
#labels_distribution = tfd.Bernoulli(logits=logits)
labels_distribution = tfd.Categorical(logits=logits)
#labels_distribution = tfd.Bernoulli(logits=logits)
# Perform KL annealing. The optimal number of annealing steps
# depends on the dataset and architecture.
t = tf.Variable(0.0)
kl_regularizer = t / (FLAGS.kl_annealing * len(X_train) / FLAGS.batch_size)
#Compute the -ELBO as the loss. The kl term is annealed from 1 to 1 over
# the epochs specified by the kl_annealing flag.
log_likelihood = labels_distribution.log_prob(labels_final)
#neg_log_likelihood = tf.reduce_mean(tf.squared_difference(logits,labels_final))
neg_log_likelihood = -tf.reduce_mean(input_tensor = log_likelihood)
kl = sum(neural_net.losses)/len(X_train) * tf.minimum(1.0, kl_regularizer)
elbo_loss = neg_log_likelihood + kl
# Build metrics for evaluation. Predictions are formed from single forward
# pass of the probablisitic layers . They are cheap but noisy predictions
predictions = tf.argmax(input = logits, axis=1)
predictions = tf.cast(predictions, tf.float32)
# TP, TN, FP, FN
TP = tf.count_nonzero(predictions * labels_final)
TN = tf.count_nonzero((predictions - 1) * (labels_final - 1))
FP = tf.count_nonzero(predictions * (labels_final - 1))
FN = tf.count_nonzero((predictions - 1) * labels_final)
# precision, recall, f1
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * precision * recall / (precision + recall)
tpr = TP/(TP+FN)
fpr = FP/(TP+FN)
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator = tf.data.Iterator.from_structure(training_batches.output_types, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
with tf.compat.v1.name_scope("train"):
train_accuracy, train_accuracy_update_op = tf.metrics.accuracy(labels=labels_final,predictions =predictions)
opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
train_op = opt.minimize(elbo_loss)
update_step_op = tf.assign(t, t+1)
with tf.compat.v1.name_scope("valid"):
valid_accuracy, validation_accuracy_update_op = tf.metrics.accuracy(labels= labels_final,predictions = predictions)
with tf.compat.v1.name_scope("test"):
test_accuracy, test_accuracy_update_op = tf.metrics.accuracy(labels = labels_final,predictions = predictions)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
saver = tf.train.Saver()
stream_vars_valid = [ v for v in tf.local_variables() if "valid" in v.name]
reset_valid_op = tf.variables_initializer(stream_vars_valid)
valid_accuracy_summary = []
stop_early =0
with tf.compat.v1.Session() as sess:
sess.run(init_op)
# Run the training loop
train_val_string, test_string = sess.run([
train_val_iterator.string_handle(),
test_iterator.string_handle()])
training_steps = int(round(FLAGS.epochs * (len(X_train) / FLAGS.batch_size)))
for step in range(training_steps):
#start reininitializable's train iterator
sess.run(training_iterator, feed_dict = {placeholder_X:X_train, placeholder_y:y_train})
#
_ = sess.run([train_op,train_accuracy_update_op, update_step_op],feed_dict={handle: train_val_string})
# Manually print the frequency
if step % 100 == 0:
save_path = saver.save(sess, "/tmp/my_model.ckpt")
loss_value, accuracy_value, kl_value = sess.run([elbo_loss, train_accuracy, kl], feed_dict= {handle: train_val_string})
print("Step:{:>3d} loss : {:.3f} KL: {:.3f}" .format(step , loss_value, accuracy_value, kl_value))
if (step +1) % FLAGS.eval_freq ==0:
# Compute log prob of heldout set by averaging draws from the model:
# p(heldout | train) = int_model p(heldout|model) p(model|train) ~= 1/n * sum_{i=1}^n p(heldout | model_i)
# where model_i is a draw from the posterior
#p(model|train)
probs = np.asarray([sess.run((labels_distribution.probs),
feed_dict ={handle: train_val_string})
for _ in range(FLAGS.num_monte_carlo)])
mean_probs = np.mean(probs, axis =0).astype(np.int32)
print(mean_probs.dtype)
_, label_vals = sess.run((features_final, labels_final), feed_dict = {handle: train_val_string})
label_vals = (label_vals).astype(np.int32)
heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]), label_vals]))
print(" ...Held_out nats: {:.3f}".format(heldout_lp))
# Calculate validation accuracy
for step in range(10):
#start reinitializable's validation iterator
sess.run(val_iterator, feed_dict = {placeholder_X:X_val, placeholder_y:y_val})
sess.run(validation_accuracy_update_op, feed_dict={handle:train_val_string})
valid_value = sess.run(valid_accuracy, feed_dict={handle:train_val_string})
valid_accuracy_summary.append(valid_value)
if valid_value < max(valid_accuracy_summary) and step > 100:
stop_early += 1
if stop_early == 40:
break
else:
stop_early = 0
print("Validation Accuracy: {:.3f}".format(valid_value))
sess.run(reset_valid_op)
#Feed to r=feedable iterator the string handle
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
print("Step: {:>3d} test Accuracy: {:.3f} Precision: {:.3f} Recall: {:.3f} ".format(step, test_value, precision_value, recall_value))
print("Step: {:>3d} fpr: {:.3f} tpr: {:.3f} f1_1: {:.3f}".format( step, fpr_value, tpr_value,f1))
if __name__ == "__main__":
tf.compat.v1.app.run()
Expect the output to progress but it is giving out this error
Step: 0 loss : 0.646 KL: 0.875
Step:100 loss : 0.654 KL: 0.904
Step:200 loss : 0.657 KL: 0.906
Step:300 loss : 0.648 KL: 0.906
int32
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:137: RuntimeWarning: divide by zero encountered in log
...Held_out nats: -inf
Validation Accuracy: 0.914
Step: 9 test Accuracy: 0.000 Precision: 0.910 Recall: 1.000
Step: 9 fpr: 0.099 tpr: 1.000 f1_1: 0.953
Step:400 loss : 0.624 KL: 0.906
Step:500 loss : 0.641 KL: 0.906
Step:600 loss : 0.612 KL: 0.906
Step:700 loss : 0.579 KL: 0.906
int32
...Held_out nats: -inf
Validation Accuracy: 0.914
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
302 self._unique_fetches.append(ops.get_default_graph().as_graph_element(
--> 303 fetch, allow_tensor=True, allow_operation=True))
304 except TypeError as e:
14 frames
TypeError: Can not convert a float64 into a Tensor or Operation.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
305 raise TypeError('Fetch argument %r has invalid type %r, '
306 'must be a string or Tensor. (%s)' %
--> 307 (fetch, type(fetch), str(e)))
308 except ValueError as e:
309 raise ValueError('Fetch argument %r cannot be interpreted as a '
The exception arises because you use same name f1 as assignment, we need to change name f1 at left side.
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
change the line to
test_value, precision_value, recall_value, fpr_value, tpr_value,f1_value = sess.run([test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
Hopefully, this will work.

MXNET custom symbol loss with gluon

I wrote this code,(almost are from tutorial, I just modified a few lines)
and this is not working.
from mxnet import gluon
from mxnet.gluon import nn
np.random.seed(42)
mx.random.seed(42)
ctx = mx.gpu()
def data_xform(data):
"""Move channel axis to the beginning, cast to float32, and normalize to [0, 1]."""
return nd.moveaxis(data, 2, 0).astype('float32') / 255
# prepare data
train_data = mx.gluon.data.vision.MNIST(train=True).transform_first(data_xform)
val_data = mx.gluon.data.vision.MNIST(train=False).transform_first(data_xform)
batch_size = 100
train_loader = mx.gluon.data.DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = mx.gluon.data.DataLoader(val_data, shuffle=False, batch_size=batch_size)
# create network
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
net= gluon.SymbolBlock(outputs=[fc3], inputs=[data])
net.initialize(ctx=ctx)
# create trainer, metric
trainer = gluon.Trainer(
params=net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': 0.1, 'momentum':0.9, 'wd':0.00001},
)
metric = mx.metric.Accuracy()
# learn
num_epochs = 10
for epoch in range(num_epochs):
for inputs, labels in train_loader:
inputs = inputs.as_in_context(ctx)
labels = labels.as_in_context(ctx)
with autograd.record():
outputs = net(inputs)
# softmax
exps = nd.exp(outputs - outputs.min(axis=1).reshape((-1,1)))
exps = exps / exps.sum(axis=1).reshape((-1,1))
# cross entropy
loss = nd.MakeLoss(-nd.log(exps.pick(labels)))
#
#loss = gluon.loss.SoftmaxCrossEntropyLoss()(outputs, labels)
#print(loss)
loss.backward()
metric.update(labels, outputs)
trainer.step(batch_size=inputs.shape[0])
name, acc = metric.get()
print('After epoch {}: {} = {}'.format(epoch + 1, name, acc))
metric.reset()
If I use gluon.loss.SoftmaxCrossEntropyLoss, this runs well..
When I print loss in both cases, output values are look same.
What are the differences?
Thank you for advance
I am not entirely sure, why you subtract outputs.min() when calculating softmax. Original softmax function doesn't do anything like that - https://en.wikipedia.org/wiki/Softmax_function. If you don't do that, you will get a good value of accuracy:
# softmax
exps = nd.exp(outputs)
exps = exps / exps.sum(axis=1).reshape((-1, 1))
# cross entropy
loss = nd.MakeLoss(-nd.log(exps.pick(labels)))
I get:
After epoch 1: accuracy = 0.89545
After epoch 2: accuracy = 0.9639
After epoch 3: accuracy = 0.97395
After epoch 4: accuracy = 0.9784
After epoch 5: accuracy = 0.98315

No variation in accuracy and loss for the CNN?

I tried to classify images of 45 classes of 700 images each and perform simple CNN classification with two layers: of batch size: 252, epoch: 30, learning rate: 0.0001, Image size: 256 by 256 by3. I tried to increase as well as decrease the learning rate. Also the data set was split in the ratio 08:0.1:0.1 for training:testing:validation. However the accuracy and loss remains unchanged the loss is always zero. This is the architecture:
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 252, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//train//None_train_00000-of-00001.tfrecord'
datapath_validation = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//validation//None_validation_00000-of-00001.tfrecord'
datapath_test = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//test//None_test_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
#Layer 1
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
#Layer 2
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#Placeholders
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_pool1, num_input_channels=32, filter_size=3, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTENED LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_pool2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension = 1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
dataset_validation = tf.data.TFRecordDataset(datapath_validation)
dataset_test = tf.data.TFRecordDataset(datapath_test)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
dataset_validation = dataset_validation.map(_extract_fn)
dataset_test = dataset_test.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
dataset_validation = dataset_validation.batch(FLAGS.batch_size)
iterator_validation = dataset_validation.make_initializable_iterator()
next_element_validation = iterator_validation.get_next()
dataset_test = dataset_test.batch(FLAGS.batch_size)
iterator_test = dataset_test.make_initializable_iterator()
next_element_test = iterator_test.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer = tf.summary.FileWriter("Training_FileWriter/")
"""
# create a summary for our cost and accuracy
train_cost_summary = tf.summary.scalar("train_cost", cost)
train_acc_summary = tf.summary.scalar("train_accuracy", accuracy)
test_cost_summary = tf.summary.scalar("test_cost", cost)
test_acc_summary = tf.summary.scalar("test_accuracy", accuracy)"""
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_test.initializer)
# Add the model graph to TensorBoard
writer.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
sess.run(iterator_validation.initializer)
start_time = time.time()
"""train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0"""
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
#_, loss_train, acc_train, _train_cost_summary, _train_acc_summary = sess.run([optimizer, cost, accuracy, train_cost_summary, train_acc_summary], feed_dict = {x: img_train, y: lbl_train})
_, loss_train, acc_train = sess.run([optimizer, cost, accuracy], feed_dict = {x: img_train, y: lbl_train})
#train_accuracy+=acc_train
#writer.add_summary(_train_cost_summary, epoch +1)
#writer.add_summary(_train_acc_summary, epoch +1)
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
The output after training is as shown below:
Training
Epoch 1 completed : Time usage 122 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 2 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 3 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 4 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
There is no learning of the model. I have inspected several times, the logic seems to be ok. What could be the probable reason why this is constant even after changing the learning rate, epoch and also i have tried to generate several datasets.
You have made a mistake in cross_entropy, where you are comparing the output with itself.
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
Try this
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
# y_actual should be one-hot labeled vector
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_actual)