TensorFlow ValueError: Rank mismatch error - tensorflow

All, I am completely stuck due to an error in my code to classify Cats vs. Dogs using a Convolution network. I could use the high level libraries available these days, but for learning, I want to get this lower level working. The output is a binary classification of an image containing either a cat or a dog. I have scanned a number of Rank related threads, but unable to make out how to solve this error using sparse_softmax_cross_entropy_with_logits specifically.
If I change 2 lines; use softmax_cross_entropy_with_logits_v2() and uncomment labels = tf.argmax(y, 1), then it runs, but the Accuracy even on the train set, degrades rapidly (net diverges).
Any help would be much appreciated. Thanks.
The 2 lines I am not a 100% sure about are as follows.
Should the 1 here be n_outputs (which is 2)? (since binary but it does not seem right)
y = tf.placeholder(dtype=tf.int64, shape=[100, 1], name="y")
This is the line that throws the error: ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 2).
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\
labels=y, logits=logits)
Full code (from the point of having data in hand) is as below; it is long-ish but simple. I have commented out the end since the error is thrown before it gets there. Error is at the end of it below.
#---Split data into training & test sets---
# Work the data for cats and dogs numpy arrays
# These numpy arrays were generated in previous data prep work
# Stack the numpy arrays for the inputs
X_cat_dog = np.concatenate((cats_1000_64_64_1, dogs_1000_64_64_1),
axis = 0)
X_cat_dog = X_cat_dog.reshape(-1, width*height) #Flatten
# Scikit Learn for min-max scaling of the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(np.array([0., 255.]).reshape(-1,1))
X_cat_dog_scaled = scaler.transform(X_cat_dog)
# Define the labels to be used: cats = 0, dogs = 1
y_cat_dog = np.concatenate((np.zeros((1000), dtype = np.int32),
np.ones((1000), dtype = np.int32)),
axis = 0)
# Scikit Learn for random splitting of the data
from sklearn.model_selection import train_test_split
# Random split of data into training (80%) and test (20%)
X_train, X_test, y_train, y_test = \
train_test_split(X_cat_dog_scaled, y_cat_dog, test_size=0.20,
random_state = RANDOM_SEED)
print('Train orig. shape:', X_train.shape, y_train.shape)
print('Test orig. shape:', X_test.shape, y_test.shape)
#Reshape into 4D
X_train = np.reshape(X_train, newshape=[X_train.shape[0], height, width, channels])
y_train = np.reshape(y_train, newshape=[y_train.shape[0], 1])
X_test = np.reshape(X_test, newshape=[X_test.shape[0], height, width, channels])
y_test = np.reshape(y_test, newshape=[y_test.shape[0], 1])
print('Train 4D shape:', X_train.shape, y_train.shape, type(X_train), type(y_train))
print('Test 4D shape:', X_test.shape, y_test.shape, type(X_test), type(y_test))
#---Define and run convolution net---
#Init
results = [] #Summary results
reset_graph() #Else upon rerun, error occurs
n_outputs = 2 #Binary; cat or dog
n_strides = [1,2,2] #Symmetric XY + same across conv & pool
n_conv_blocks = 1 #Number of convolution blocks
n_filters = [5, 10, 20] #Number of filters applied per layer
#Placeholders for batch training
X = tf.placeholder(dtype=tf.float64,
shape=[100, height, width, channels], name="X")
y = tf.placeholder(dtype=tf.int64, shape=[100, 1], name="y")
print('X.shape =', X.shape, tf.rank(X))
print('y.shape =', y.shape, tf.rank(y))
#Define hidden layers
with tf.name_scope("cnn"):
#Create number of convolution blocks required
for block in range(n_conv_blocks):
#Convolution layer
inputLayer = X
if (block>0):
inputLayer = pool
print('\nStride:', n_strides[block])
conv = tf.layers.conv2d(inputLayer,
filters = n_filters[block],
kernel_size = 1,
strides = n_strides[block],
activation = tf.nn.leaky_relu,
padding = "SAME")
print('Conv '+str(block)+'.shape =',
conv.get_shape().as_list())
#Pooling layer
pool = tf.nn.avg_pool(conv,
ksize = [1,2,2,1],
strides = [1,n_strides[block],n_strides[block],1],
padding = "SAME")
print('Pool '+str(block)+'.shape =', pool.shape)
pool_shape = pool.get_shape().as_list()
next_width = pool_shape[1]
next_height = pool_shape[2]
next_depth = pool_shape[3]
#Fully connected
flattened = tf.reshape(pool, [-1,
next_width * next_height * next_depth])
print('\nFlattened.shape =', flattened.shape)
hidden = tf.layers.dense(flattened,
next_width * next_height * next_depth,
name="hidden1",
activation=tf.nn.leaky_relu)
print('\nHidden.shape =', hidden.shape, tf.rank(hidden))
#Output
logits = tf.layers.dense(hidden, n_outputs, name="outputs")
print('\nLogits.shape =', logits.shape, tf.rank(logits))
#Define loss function
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\
labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
#Define optimizer used for reducing the loss; MomentumOptimizer
learning_rate = 0.01
momentum = 0.01
with tf.name_scope("train"):
optimizer = tf.train.MomentumOptimizer(learning_rate,
momentum)
training_op = optimizer.minimize(loss)
#Define performance measure; accuracy in this case
with tf.name_scope("eval"):
#labels = tf.argmax(y, 1)
labels = y
correct = tf.nn.in_top_k(logits, labels, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#Define instantiator for TensorFlow variables
init = tf.global_variables_initializer()
#Carry out training in mini-batches
n_epochs = 1
batch_size = 100
with tf.Session() as sess:
#Instantiate variables
init.run()
#Loop over n_epochs
for epoch in range(n_epochs):
#Loop over batches
for iteration in range(y_train.shape[0] // batch_size):
X_batch = X_train[\
iteration*batch_size:(iteration + 1)*batch_size,:]
y_batch = y_train[\
iteration*batch_size:(iteration + 1)*batch_size]
print(y_batch.shape, type(y_batch))
# sess.run(training_op, feed_dict={X: X_batch,
# y: y_batch})
# #Measure performance
# acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
# acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# if (epoch % 1 == 0):
# print(epoch,
# "Train Accuracy:",
# '{:0.1%}'.format(acc_train),
# "\tTest Accuracy:",
# '{:0.1%}'.format(acc_test))
# results.append([epoch, acc_train, acc_test])
Error is as follows.
X.shape = (100, 64, 64, 1) Tensor("Rank:0", shape=(), dtype=int32)
y.shape = (100, 1) Tensor("Rank_1:0", shape=(), dtype=int32)
Stride: 1
Conv 0.shape = [100, 64, 64, 5]
Pool 0.shape = (100, 64, 64, 5)
Flattened.shape = (100, 20480)
Hidden.shape = (100, 20480) Tensor("cnn/Rank:0", shape=(), dtype=int32)
Logits.shape = (100, 2) Tensor("cnn/Rank_1:0", shape=(), dtype=int32)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-25-7961eb9a772c> in <module>()
58 #Define loss function
59 with tf.name_scope("loss"):
---> 60 xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits)
61 loss = tf.reduce_mean(xentropy, name="loss")
62
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits(_sentinel, labels, logits, name)
2645 raise ValueError("Rank mismatch: Rank of labels (received %s) should "
2646 "equal rank of logits minus 1 (received %s)." %
-> 2647 (labels_static_shape.ndims, logits.get_shape().ndims))
2648 if (static_shapes_fully_defined and
2649 labels_static_shape != logits.get_shape()[:-1]):
ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 2).

OK, I figured it out. I adjusted 2 lines as follows.
I dropped the extra dimension from shape of y as follows.
y = tf.placeholder(dtype=tf.int64, shape=[None], name="y")
All references to y_batch after its definition, were replaced with y_batch.reshape(-1). This was needed to reduce the rank of y_batch to get rid of the error.
Rest remained unchanged. Now I have a new problem, the accuracy remains low, but at least now its behaving itself (and not going to zero). Playtime!
0 Train Accuracy: 61.0% Test Accuracy: 48.5%
1 Train Accuracy: 60.0% Test Accuracy: 48.8%
2 Train Accuracy: 61.0% Test Accuracy: 49.5%
3 Train Accuracy: 65.0% Test Accuracy: 50.2%
4 Train Accuracy: 65.0% Test Accuracy: 51.0%
5 Train Accuracy: 64.0% Test Accuracy: 51.0%
6 Train Accuracy: 65.0% Test Accuracy: 51.5%
7 Train Accuracy: 66.0% Test Accuracy: 51.0%
8 Train Accuracy: 64.0% Test Accuracy: 51.2%
9 Train Accuracy: 63.0% Test Accuracy: 52.5%
10 Train Accuracy: 62.0% Test Accuracy: 52.0%
11 Train Accuracy: 62.0% Test Accuracy: 52.0%
12 Train Accuracy: 63.0% Test Accuracy: 53.5%
13 Train Accuracy: 63.0% Test Accuracy: 53.5%
14 Train Accuracy: 63.0% Test Accuracy: 54.0%
15 Train Accuracy: 63.0% Test Accuracy: 53.5%
16 Train Accuracy: 64.0% Test Accuracy: 53.5%
17 Train Accuracy: 64.0% Test Accuracy: 53.8%
18 Train Accuracy: 65.0% Test Accuracy: 53.8%
19 Train Accuracy: 65.0% Test Accuracy: 53.8%

Related

Tensorflow with custom loss containing multiple inputs - Graph disconnected error

I have a CNN output a scalar, this output is concatenated with the output of an MLP and then fed to another dense layer. I get a Graph Disconnected error
Please advise as to how to fix this. Thanks in advance.
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, concatenate, Input
import tensorflow as tf
tf.keras.backend.clear_session()
#----custom function
def custom_loss(ytrue, ypred):
loss = tf.math.log(1. + ytrue) - tf.math.log(1. + ypred)
loss = tf.math.square(loss)
loss = tf.math.reduce_mean(loss)
return loss
#------------------
cnnin = Input(shape=(10, 10, 1))
x = Conv2D(8, 4)(cnnin)
x = Conv2D(16, 4)(x)
x = Conv2D(32, 2)(x)
x = Conv2D(64, 2)(x)
x = Flatten()(x)
x = Dense(4)(x)
x = Dense(4, activation="relu")(x)
cnnout = Dense(1, activation="linear")(x)
cnnmodel= Model(cnnin, cnnout, name="cnn_model")
yt = Input(shape=(2, )) #---dummy input
#---mlp start
mlpin = Input(shape=(2, ), name="mlp_input")
z = Dense(4, activation="sigmoid")(mlpin)
z = Dense(4, activation = "softmax")(z)
mlpout = Dense(1, activation="linear")(z)
mlpmodel = Model(mlpin, mlpout, name="mlp_model")
#----concatenate
combinedout = concatenate([mlpmodel.output, cnnmodel.output ])
x = Dense(4, activation="sigmoid")(combinedout)
finalout = Dense(2, activation="linear")(x)
model = Model( [mlpin, cnnin], finalout)
model.add_loss(custom_loss(yt, finalout))
model.compile(optimizer='adam', learning_rate=1e-3, initialization="glorotnorm",
loss=None)
Graph disconnected: cannot obtain value for tensor Tensor("input_8:0", shape=(None, 2), dtype=float32) at layer "input_8". The following previous layers were accessed without issue: ['input_7', 'conv2d_12', 'conv2d_13', 'conv2d_14', 'conv2d_15', 'flatten_3', 'mlp_input', 'dense_24', 'dense_27', 'dense_25', 'dense_28', 'dense_29', 'dense_26', 'concatenate_3', 'dense_30', 'dense_31']
You can customize what happens in Model.fit based on https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit
We create a new class that subclasses keras.Model.
We just override the method train_step(self, data).
We return a dictionary mapping metric names (including the loss) to
their current value.
For example with your models:
loss_tracker = tf.keras.metrics.Mean(name = "custom_loss")
class TestModel(tf.keras.Model):
def __init__(self, model1):
super(TestModel, self).__init__()
self.model1 = model1
def compile(self, optimizer):
super(TestModel, self).compile()
self.optimizer = optimizer
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
ypred = self.model1([x], training = True)
loss_value = custom_loss(y, ypred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss_value, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
loss_tracker.update_state(loss_value)
return {"loss": loss_tracker.result()}
import numpy as np
x = np.random.rand(6, 10,10,1)
x2 = np.random.rand(6,2)
y = tf.ones((6,2))
model = Model( [mlpin, cnnin], finalout)
trainable_model = TestModel(model)
trainable_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001))
trainable_model.fit(x=(x2, x), y = y, epochs=5)
Gives the following output:
Epoch 1/5
1/1 [==============================] - 0s 382ms/step - loss: 0.2641
Epoch 2/5
1/1 [==============================] - 0s 4ms/step - loss: 0.2640
Epoch 3/5
1/1 [==============================] - 0s 6ms/step - loss: 0.2638
Epoch 4/5
1/1 [==============================] - 0s 7ms/step - loss: 0.2635
Epoch 5/5
1/1 [==============================] - 0s 6ms/step - loss: 0.2632
<tensorflow.python.keras.callbacks.History at 0x14c69572688>

eager mode and keras.fit have different results

I am trying to convert model.fit() in Keras to the eager mode training. The model is an autoencoder. It has one encoder and two decoders. The decoders have different loss functions. The losses for decoders in eager model and model.fit are the same. I tried to set everything as the model.fit(). But the losses are different. I really appreciate help me out.
The link for google colab: https://colab.research.google.com/drive/1XNOwJ9oVgs1z9qqXIs_ldnKuSm3Dn2Ud?usp=sharing
In the following, the definition and training of the model are shown. I use model.fit() for training. Also, in the end, the output is shown, which shows the values for losses.
def fit_ae (x_unlab, p_m, alpha, parameters):
# Parameters
_, dim = x_unlab.shape
epochs = parameters['epochs']
batch_size = parameters['batch_size']
# Build model
inputs = contrib_layers.Input(shape=(dim,))
# Encoder
h = contrib_layers.Dense(int(256), activation='relu', name='encoder1')(inputs)
h = contrib_layers.Dense(int(128), activation='relu', name='encoder2')(h)
h = contrib_layers.Dense(int(26), activation='relu', name='encoder3')(h)
# Mask estimator
output_1 = contrib_layers.Dense(dim, activation='sigmoid', name = 'mask')(h)
# Feature estimator
output_2 = contrib_layers.Dense(dim, activation='sigmoid', name = 'feature')(h)
#Projection Network
model = Model(inputs = inputs, outputs = [output_1, output_2])
model.compile(optimizer='rmsprop',
loss={'mask': 'binary_crossentropy',
'feature': 'mean_squared_error'},
loss_weights={'mask':1, 'feature':alpha})
m_unlab = mask_generator(p_m, x_unlab)
m_label, x_tilde = pretext_generator(m_unlab, x_unlab)
# Fit model on unlabeled data
model.fit(x_tilde, {'mask': m_label, 'feature': x_unlab}, epochs = epochs, batch_size= batch_size)
########### OUTPUT
Epoch 1/15
4/4 [==============================] - 1s 32ms/step - loss: 1.0894 - mask_loss: 0.6560 - feature_loss: 0.2167
Epoch 2/15
4/4 [==============================] - 0s 23ms/step - loss: 0.6923 - mask_loss: 0.4336 - feature_loss: 0.1293
Epoch 3/15
4/4 [==============================] - 0s 26ms/step - loss: 0.4720 - mask_loss: 0.3022 - feature_loss: 0.0849
Epoch 4/15
4/4 [==============================] - 0s 23ms/step - loss: 0.4054 - mask_loss: 0.2581 - feature_loss: 0.0736
In the following code, I implemented the above code in eager mode. I set all optimizer and loss functions same as the above code. Data are the same for training both model.
###################################################### MODEL AUTOENCODER ============================================
def eager_ae(x_unlab,p_m,alpha,parameters):
# import pdb; pdb.set_trace()
_, dim = x_unlab.shape
epochs = parameters['epochs']
batch_size = parameters['batch_size']
E = keras.Sequential([
Input(shape=[dim,]),
Dense(256,activation='relu'),
Dense(128,activation='relu'),
Dense(26,activation='relu'),
])
# Mask estimator
output_1 = keras.Sequential([
Dense(dim,activation='sigmoid'),
])
# Feature estimator
output_2 = keras.Sequential([
Dense(dim,activation='sigmoid'),
])
optimizer = tf.keras.optimizers.RMSprop()
loss_mask = tf.keras.losses.BinaryCrossentropy()
loss_feature = tf.keras.losses.MeanSquaredError()
# Generate corrupted samples
m_unlab = mask_generator(p_m, x_unlab)
m_label, x_tilde = pretext_generator(m_unlab, x_unlab)
for epoch in range(epochs):
loss_metric = tf.keras.metrics.Mean(name='train_loss')
len_batch = range(int(x_unlab.shape[0]/batch_size))
for i in len_batch:
samples = x_tilde[i*batch_size:(i+1)*batch_size]
mask = m_label[i*batch_size:(i+1)*batch_size]
# train_step(samples,tgt)
with tf.GradientTape() as tape:
latent = E(samples, training=True)
out_mask = output_1(latent)
out_feat = output_2(latent)
# import pdb; pdb.set_trace()
lm = loss_mask(out_mask,tf.Variable(mask,dtype=tf.float32))
lf = loss_feature(out_feat,tf.Variable(samples,dtype=tf.float32))
pred_loss = lm + alpha*lf
trainable_vars = E.trainable_weights+output_1.trainable_weights+output_2.trainable_weights
grads = tape.gradient(pred_loss, trainable_vars)
optimizer.apply_gradients(zip(grads, trainable_vars))
loss_metric.update_state(pred_loss)
print(f'Epoch {epoch}, Loss {loss_metric.result()}')
return E
############# OUTPUT
Epoch 0, Loss 7.902271747589111
Epoch 1, Loss 5.336598873138428
Epoch 2, Loss 2.880791664123535
Epoch 3, Loss 1.9296690225601196
Epoch 4, Loss 1.6377944946289062
Epoch 5, Loss 1.5342860221862793
Epoch 6, Loss 1.5015968084335327
Epoch 7, Loss 1.4912563562393188
The total loss in the first code is less than zero (≈0.25), while the total loss in the second code is more than 1 (≈1.3). I can not find the issue in my second implementation (the second code).

Stateful LSTM VAE: Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [batch_size, latent_dim]

I am solving a Timeseries problem using LSTM VAE(Variational auto-encoder), I have built my VAE model as below
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
class VAE:
def __init__(self,
hidden_layer_units,
hidden_layer_leakyrelu_alphas,
hidden_layer_dropout_rates,
batch_size,
time_steps,
num_features,
is_stateful_learning):
self.hidden_layer_units = hidden_layer_units
self.hidden_layer_leakyrelu_alphas = hidden_layer_leakyrelu_alphas
self.hidden_layer_dropout_rates = hidden_layer_dropout_rates
self.encoder_num_layers = 0
self.latent_space_dim = 0
vae_total_layers = len(hidden_layer_units)
if 0 < vae_total_layers:
self.encoder_num_layers = int((vae_total_layers - 1) / 2)
self.latent_space_dim = self.hidden_layer_units[self.encoder_num_layers]
self.batch_size = batch_size
self.time_steps = time_steps
self.num_features = num_features
self.is_stateful_learning = is_stateful_learning
self.encoder = None
self.decoder = None
self.model = None
self.model_input = None
self.model_output = None
self.mu = None
self.log_variance = None
self.kulback_coef = 0.0001
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss, self._calculate_kl_loss])
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_encoder(self):
encoder_input = self._add_encoder_input()
lstm_layers = self._add_encoder_lstm_layers(encoder_input)
bottleneck = self._add_bottleneck(lstm_layers)
self.model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
repeater_layer = self._add_repeater_layer(decoder_input)
lstm_layer = self._add_decoder_lstm_layer(repeater_layer)
decoder_output = self._add_decoder_output(lstm_layer)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _build_autoencoder(self):
model_input = self.model_input
encoder_output = self.encoder(model_input)
model_output = self.decoder(encoder_output)
self.model_output = model_output
self.model = Model(model_input, model_output, name="autoencoder")
def _add_encoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.time_steps, self.num_features), name="encoder_input")
else:
x = Input(shape=(self.time_steps, self.num_features), name="encoder_input")
return x
def _add_encoder_lstm_layers(self, encoder_input):
""" Create all lstm layers in encoder."""
x = encoder_input
for layer_index, units in enumerate(self.hidden_layer_units[:self.encoder_num_layers]):
lstm_params = {}
if layer_index < self.encoder_num_layers - 1:
lstm_params["return_sequences"] = True
if self.is_stateful_learning:
lstm_params["stateful"] = True
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_index])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_index])(x)
return x
def _add_bottleneck(self, x):
""" add bottleneck with Guassian sampling (Dense layer)."""
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim, name="log_variance")(x)
x = Lambda(self.sample_point_from_normal_distribution, name="encoder_output")([self.mu, self.log_variance])
return x
def sample_point_from_normal_distribution(self, args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
def _add_decoder_input(self):
if self.is_stateful_learning:
x = Input(batch_shape=(self.batch_size, self.latent_space_dim), name="decoder_input")
else:
x = Input(shape=(self.latent_space_dim), name="decoder_input")
return x
def _add_repeater_layer(self, decoder_input):
return RepeatVector(self.time_steps)(decoder_input)
def _add_decoder_lstm_layer(self, repeater_layer):
x = repeater_layer
for layer_index, units in enumerate(self.hidden_layer_units[self.encoder_num_layers + 1:]):
lstm_params = {}
if self.is_stateful_learning:
# stateful build
lstm_params = {'stateful': True, 'return_sequences': True}
else:
lstm_params["return_sequences"] = True
layer_no = layer_index + self.encoder_num_layers + 1
x = LSTM(units=units, **lstm_params)(x)
x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_no])(x)
x = Dropout(rate=self.hidden_layer_dropout_rates[layer_no])(x)
return x
def _add_decoder_output(self, lstm_layer):
return TimeDistributed(Dense(1))(lstm_layer)
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = reconstruction_loss + (self.kulback_coef * kl_loss)
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=1)
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) - K.exp(self.log_variance), axis=1)
return kl_loss
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
vae.compile(learning_rate)
vae.summary()
return vae.model
Model training block looks as below
# configuration
nn_lstm_layer_units = [160, 3, 160]
nn_leakyrelu_layer_alphas = [0.0, 0.0, 0.0]
nn_dropout_layer_rates = [0.3, 0.0, 0.3]
batch_size = 96
win_length = 64
num_features = 6 # You can use single variate Timeseries data as well, num_features = 1
epochs = 782
learning_rate = 0.0001
want_stateful_learning = True
# Build LSTM VAE model
model = build_lstm_neural_network(nn_lstm_layer_units, nn_leakyrelu_layer_alphas, nn_dropout_layer_rates, batch_size,
win_length, num_features, want_stateful_learning)
TIME_STEPS = win_length
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i: (i + time_steps)])
return np.stack(output)
x_train = create_sequences(x_train)
x_val = create_sequences(x_val)
callbacks = []
unfit_train_record_count = 0
unfit_val_record_count = 0
if want_stateful_learning:
# stateful learning
# adjust train data size(should be in multiples of batch size)
unfit_train_record_count = len(x_train) % batch_size
unfit_val_record_count = len(x_val) % batch_size
# Reset states of the stateful model on epoch end
stateful_model_reset_states = LambdaCallback(on_epoch_end=lambda batch, logs: model.reset_states())
callbacks.append(stateful_model_reset_states)
early_stopping = EarlyStopping(monitor=monitor, patience=patience)
callbacks.append(early_stopping)
# Model traning
history = model.fit(x=x_train[unfit_train_record_count:], y=x_train[unfit_train_record_count:, :, [0]], validation_data=(x_val[unfit_val_record_count:], x_val[unfit_val_record_count:, :, [0]]), batch_size=batch_size, epochs=epochs, shuffle=False, callbacks=callbacks)
The stateless mode of the model is working as expected but the stateful mode is throwing an error as below-
1632/1632 [==============================] - ETA: 0s - loss: 0.2447 - _calculate_reconstruction_loss: 0.2447 - _calculate_kl_loss: 0.0326
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
[[metrics/_calculate_reconstruction_loss/Identity/_229]]
(1) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
[[{{node decoder_input}}]]
Environment used is as
Python-3.8.12,
Tensorflow-gpu: 2.5,
cudnn: 8.2.1.32
I am not clear why the stateful model run 1 Epoch for training data, but as soon as it starts to process the validation data, it throws the error.
I had the same experiences with dataset and loss function that not suitable, I try to simulate again it possible no loss value change, no loss as nan, error when validation.
That is possible no value, no match or not update neuron, you can use Tensorflow 2.x is a lot moire easier.
This is no match validation: Working on training but results in errors when validation. ( one possible )
Epoch 1/100
2022-01-23 21:04:59.846791: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - ETA: 0s - loss: 3.1866 - accuracy: 0.0000e+00Traceback (most recent call last):
Another possible is loss Fn no match: It is possible they are not update the neurons
Epoch 1/100
2022-01-23 21:08:23.330068: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - 3s 3s/step - loss: 13.7138 - accuracy: 0.2000 - val_loss: 8.2133 - val_accuracy: 0.0000e+00
Epoch 2/100
1/1 [==============================] - 0s 65ms/step - loss: 7.7745 - accuracy: 0.0000e+00 - val_loss: 8.0456 - val_accuracy: 0.0000e+00
I solved the problem, by changing the loss calculation logic, instead of defining the functions to calculate reconstruction and KL loss in the VAE class, I moved the loss calculation part outside the VAE class as below
# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
vae = VAE(
hidden_layer_units=lstm_layer_units,
hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
hidden_layer_dropout_rates=dropout_layer_rates,
batch_size=number_of_sequences,
time_steps=time_steps,
num_features=data_dim,
is_stateful_learning=is_stateful_learning
)
# Add reconstruction loss
error = vae.model_input - vae.model_output
reconstruction_loss = K.mean(K.square(error))
vae.model.add_loss(reconstruction_loss)
vae.model.add_metric(reconstruction_loss, name='mse_loss', aggregation='mean')
# Add KL loss
kl_loss = kl_beta * K.mean(-0.5 * K.sum(1 + vae.log_variance - K.square(vae.mu) - K.exp(vae.log_variance), axis = 1), axis=0)
model.add_loss(kl_loss)
model.add_metric(kl_loss, name='kl_loss', aggregation='mean')
optimizer = Adam(learning_rate=vae.learning_rate, clipvalue=vae.clipvalue)
vae.model.compile(loss=None, optimizer=optimizer)
vae.summary()
return vae.model

No variation in accuracy and loss for the CNN?

I tried to classify images of 45 classes of 700 images each and perform simple CNN classification with two layers: of batch size: 252, epoch: 30, learning rate: 0.0001, Image size: 256 by 256 by3. I tried to increase as well as decrease the learning rate. Also the data set was split in the ratio 08:0.1:0.1 for training:testing:validation. However the accuracy and loss remains unchanged the loss is always zero. This is the architecture:
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 252, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//train//None_train_00000-of-00001.tfrecord'
datapath_validation = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//validation//None_validation_00000-of-00001.tfrecord'
datapath_test = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//test//None_test_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
#Layer 1
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
#Layer 2
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#Placeholders
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_pool1, num_input_channels=32, filter_size=3, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTENED LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_pool2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension = 1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
dataset_validation = tf.data.TFRecordDataset(datapath_validation)
dataset_test = tf.data.TFRecordDataset(datapath_test)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
dataset_validation = dataset_validation.map(_extract_fn)
dataset_test = dataset_test.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
dataset_validation = dataset_validation.batch(FLAGS.batch_size)
iterator_validation = dataset_validation.make_initializable_iterator()
next_element_validation = iterator_validation.get_next()
dataset_test = dataset_test.batch(FLAGS.batch_size)
iterator_test = dataset_test.make_initializable_iterator()
next_element_test = iterator_test.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer = tf.summary.FileWriter("Training_FileWriter/")
"""
# create a summary for our cost and accuracy
train_cost_summary = tf.summary.scalar("train_cost", cost)
train_acc_summary = tf.summary.scalar("train_accuracy", accuracy)
test_cost_summary = tf.summary.scalar("test_cost", cost)
test_acc_summary = tf.summary.scalar("test_accuracy", accuracy)"""
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_test.initializer)
# Add the model graph to TensorBoard
writer.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
sess.run(iterator_validation.initializer)
start_time = time.time()
"""train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0"""
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
#_, loss_train, acc_train, _train_cost_summary, _train_acc_summary = sess.run([optimizer, cost, accuracy, train_cost_summary, train_acc_summary], feed_dict = {x: img_train, y: lbl_train})
_, loss_train, acc_train = sess.run([optimizer, cost, accuracy], feed_dict = {x: img_train, y: lbl_train})
#train_accuracy+=acc_train
#writer.add_summary(_train_cost_summary, epoch +1)
#writer.add_summary(_train_acc_summary, epoch +1)
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
The output after training is as shown below:
Training
Epoch 1 completed : Time usage 122 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 2 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 3 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 4 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
There is no learning of the model. I have inspected several times, the logic seems to be ok. What could be the probable reason why this is constant even after changing the learning rate, epoch and also i have tried to generate several datasets.
You have made a mistake in cross_entropy, where you are comparing the output with itself.
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
Try this
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
# y_actual should be one-hot labeled vector
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_actual)

Tensor Flow - low accuracy on CNN Mnist Data set / How to batch accuracy calculations

The code is in python 3.5.2 with Tensor flow. The neural network returns an accuracy of between .10 and 5.00, with the higher value tending to be the accuracy of the training data by a factor of roughly 6. I cannot tell whether the neural network is legitimately doing worse than random guessing or if the accuracy code i am using has a serious fault i cannot see.
The neural network consists of 5 layers:
input
conv1 (with max pooling relu and dropout)
conv2 (with max pooling relu and dropout)
fully connected (with relu)
output
uses default Adam optimizer
I feel very suspicious of my accuracy calculations as i made them differently than what i have seen due to RAM constraints. The accuracy calculation does both the accuracy of the train and test data.
acc_total = 0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.test.num_examples/batch_size)):
test_x, test_y = mnist.test.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: test_x, y: test_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.test.num_examples),end='\r')
print('Epoch', epoch, 'current test set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
acc_total=0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.train.num_examples/batch_size)):
train_x, train_y = mnist.train.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: train_x, y: train_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.train.num_examples),end='\r')
print('Epoch', epoch, 'current train set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
This is a sample of the outputs:
Epoch 0 completed out of 20 loss: 10333239.3396 83.29 ts 429
Epoch 0 current test set accuracy : 0.7072
Epoch 0 current train set accuracy : 3.8039
Epoch 1 completed out of 20 loss: 1831489.40747 39.24 ts 858
Epoch 1 current test set accuracy : 0.7765
Epoch 1 current train set accuracy : 4.2239
Epoch 2 completed out of 20 loss: 1010191.40466 25.89 ts 1287
Epoch 2 current test set accuracy : 0.8069
Epoch 2 current train set accuracy : 4.3898
Epoch 3 completed out of 20 loss: 631960.809082 0.267 ts 1716
Epoch 3 current test set accuracy : 0.8277
Epoch 3 current train set accuracy : 4.4955
Epoch 4 completed out of 20 loss: 439149.724823 2.001 ts 2145
Epoch 4 current test set accuracy : 0.8374
Epoch 4 current train set accuracy : 4.5674
The full code is as follows (sorry about the length i added a lot of comments for my own use ):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#Imported Data set
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
#ammount of output classes
n_classes = 10
#ammount of examples processed at once
#memory impact of ~500MB for 128 with more on eval runs
batch_size = 128
#Times to cycle through the entire imput data set
epoch_amm =20
#Input and outputs placeholders
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32)
#Dropout is 1-keeprate; fc- fully conected layer dropout;conv conv layer droupout
keep_rate_fc=.5
keep_rate_conv=.75
keep_prob=tf.placeholder(tf.float32)
#Regularization paramaters
Regularization_active= False #True and False MUST be capitalized
Lambda= 1.0 #'weight' of the weights on the loss function
# counter for total steps taken by trainer
training_steps = 1
#Learning Rate For Network
base_Rate = .03
decay_steps = 64
decay_rate = .96
Staircase = True
Learning_Rate = tf.train.exponential_decay(base_Rate, training_steps, decay_steps, decay_rate, staircase='Staircase', name='Exp_decay' )
#Convolution Function returns neuronns that act on a section of prev. layer
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
#Pooling function returns max value in 2 by 2 sections
def maxpool2d(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
def relu(x):
return tf.nn.relu(x,'relu')
def add(x, b):
return tf.add(x,b)
#'Main' method, contains the Neural Network
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([7*7*64,1024])),
'W_out':tf.Variable(tf.random_normal([1024,n_classes]))}
biases = {'B_conv1':tf.Variable(tf.random_normal([32])),
'B_conv2':tf.Variable(tf.random_normal([64])),
'B_fc':tf.Variable(tf.random_normal([1024])),
'B_out':tf.Variable(tf.random_normal([n_classes]))}
# Input layer
x = tf.reshape(x, shape=[-1,28,28,1])
#first layer. pass inputs through conv2d and save as conv1 then apply maxpool2d
conv1 = conv2d(x,weights['W_conv1'])
conv1 = add(conv1,biases['B_conv1'])
conv1 = relu(conv1)
conv1 = maxpool2d(conv1)
conv1 = tf.nn.dropout(conv1,keep_rate_conv)
#second layer does same as first layer
conv2 = conv2d(conv1,weights['W_conv2'])
conv2 = add(conv2,biases['B_conv2'])
conv2 = relu(conv2)
conv2 = maxpool2d(conv2)
conv2 = tf.nn.dropout(conv2,keep_rate_conv)
#3rd layer fully connected
fc = tf.reshape(conv2,[-1,7*7*64])
fc = tf.matmul(fc,weights['W_fc'])
fc = add(fc,biases['B_fc'])
fc = relu(fc)
fc = tf.nn.dropout(fc,keep_rate_fc)
#4th and final layer
output = tf.matmul(fc,weights['W_out'])
output = add(output,biases['B_out'])
return output
#Trains The neural Network
def train_neural_network(x):
training_steps = 0
#Initiate The Network
prediction = convolutional_neural_network(x)
#Define the Cost and Cost function
#tf.reduce_mean averages the values of a tensor into one value
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
#Apply Regularization if active
#if Regularization_active :
# print('DEBUG!! LINE 84 REGULARIZATION ACTIVE')
# cost = (tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y))+
# (Lambda*(tf.nn.l2_loss(weight['W_conv1'])+
# tf.nn.l2_loss(weight['W_conv2'])+
# tf.nn.l2_loss(weight['W_fc'])+
# tf.nn.l2_loss(weight['W_out'])+
# tf.nn.l2_loss(biases['B_conv1'])+
# tf.nn.l2_loss(biases['B_conv2'])+
# tf.nn.l2_loss(biases['B_fc'])+
# tf.nn.l2_loss(biases['B_out']))))
#Optimizer + Learning_Rate passthrough
optimizer = tf.train.AdamOptimizer().minimize(cost)
#Get Epoch Ammount
hm_epochs = epoch_amm
#Starts C++ Training session
print('Session Started')
with tf.Session() as sess:
#Initiate all Variables
sess.run(tf.global_variables_initializer())
#Begin Logs
summary_writer = tf.summary.FileWriter('/tmp/logs',sess.graph)
#Start Training
for epoch in range(hm_epochs):
epoch_loss = 0
for count in range(int(mnist.train.num_examples/batch_size)):
training_steps = (training_steps+1)
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
count, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'current epoch loss', epoch_loss, 'batch loss', c,'ts',training_steps,' ', end='\r')
#Log the loss per epoch
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss,' ')
acc_total = 0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.test.num_examples/batch_size)):
test_x, test_y = mnist.test.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: test_x, y: test_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.test.num_examples),end='\r')
print('Epoch', epoch, 'current test set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
acc_total=0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.train.num_examples/batch_size)):
train_x, train_y = mnist.train.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: train_x, y: train_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.train.num_examples),end='\r')
print('Epoch', epoch, 'current train set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
print('Complete')
sess.close()
#Run the Neural Network
train_neural_network(x)
The CNN had low results because of 4 reasons:
Improper (Lack of) feeding of dropout
-the keep rate was not being fed into accuracy.eval(feed_dict={x: test_x, y: test_y}) causing the network to underpreform in its accuracy evaluations
Poor Initialization of weights
RELU neuron work significantly better with weights closer to zero than normal distribution.
far to high learning rate
Learning rate of .03 even with decay was far far to high and stoped it from training effectively
errors in accuracy function
The accuracy function of the training data was receiving the size of the data set form mnist.test.num_examples instead of the proper mnist.train.num_examples and caused nonsensical values of accuracy in excess of 100%