Related
How should f1-score be evaluated during a custom training and evaluating loop in TensorFlow in a binary classification task?
I have checked some online sources. The solution using tfa simply does not work, some self-written f1score functions cannot integrate into the custom training loop. Specifically, in order to follow the same usage pattern as other evaluation metrics, such as keras.metrics.BinaryAccuracy, keras.metrics.AUC, I think I should extend on the tf.keras.metrics.Metric class, but I am not capable of writing such an evaluation function myself.
# Get model
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()
import time
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
start_time = time.time()
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Update training metric.
train_acc_metric.update_state(y_batch_train, logits)
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %d samples" % ((step + 1) * batch_size))
# Display metrics at the end of each epoch.
train_acc = train_acc_metric.result()
print("Training acc over epoch: %.4f" % (float(train_acc),))
# Reset training metrics at the end of each epoch
train_acc_metric.reset_states()
# Run a validation loop at the end of each epoch.
for x_batch_val, y_batch_val in val_dataset:
val_logits = model(x_batch_val, training=False)
# Update val metrics
val_acc_metric.update_state(y_batch_val, val_logits)
val_acc = val_acc_metric.result()
val_acc_metric.reset_states()
print("Validation acc: %.4f" % (float(val_acc),))
print("Time taken: %.2fs" % (time.time() - start_time))
Specifically, I wonder how I can calculate f1-score in exactly the same way as the train_acc_metric and val_acc_metric in the following code segment. (i.e. call update_state, result, reset_state at exactly the same location as train_acc_metric and val_acc_metric)
you can use this code
f1 = 2*(tf.compat.v1.metrics.recall(labels, predictions) * tf.compat.v1.metrics.precision(labels, predictions)) / ( tf.compat.v1.metrics.recall(labels, predictions) + tf.compat.v1.metrics.precision(labels, predictions))
or you can try this one
def f1_m(y_true, y_pred):
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
or this one
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.Accuracy(),
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall(),
tfa.metrics.F1Score(num_classes=nb_classes,
average='macro',
threshold=0.5))
I have built a neural network with tensorflow and it looks as follows:
n_hidden = 32
steps = 10**5*4
decay_rate = 1e-4
initial_lr = 1e-3
tf.reset_default_graph()
g = tf.Graph()
dropout_rate = tf.placeholder_with_default(0.2, (), name='dropout')
curr_step = tf.placeholder_with_default(1, (), name='current_step')
learning_rate = tf.train.exponential_decay(initial_lr, global_step=curr_step, decay_steps=steps,
decay_rate=decay_rate, name='learning_rate')
X_tensor = tf.placeholder(tf.float32, shape=[None, X.shape[1]], name='X_input')
y_tensor = tf.placeholder(tf.int64, shape=[None], name='y_input')
w = tf.Variable(tf.random_normal([X.shape[1], n_hidden]), name='w_0')
b = tf.Variable(tf.random.normal([n_hidden]), name='b_0')
product = tf.nn.leaky_relu(tf.matmul(X_tensor, tf.nn.dropout(w, rate=dropout_rate, name='w_0_dropout'),
name='matmul_0') + tf.nn.dropout(b, rate=dropout_rate, name='b_0_dropout'),
name='activation_0')
w_1 = tf.Variable(tf.random_normal([n_hidden, n_hidden]), name='w_1')
b_1 = tf.Variable(tf.random_normal([n_hidden]), name='b_1')
product_1 = tf.nn.leaky_relu(tf.matmul(product, tf.nn.dropout(w_1, rate=dropout_rate, name='w_1_dropout'),
name='matmul_1') + tf.nn.dropout(b_1, rate=dropout_rate, name='b_1_dropout'),
name='activation_1')
w_2 = tf.Variable(tf.random_normal([n_hidden, 1]), name='w_2')
b_2 = tf.Variable(tf.random_normal([1]), name='b_2')
product_2 = tf.reshape(tf.nn.leaky_relu(tf.matmul(product_1, tf.nn.dropout(w_2, rate=dropout_rate,
name='w_2_dropout'),
name='matmul_2') + b_2, name='activation_2'), [-1],
name='reshape')
cost = tf.losses.mean_squared_error(labels=y_tensor, predictions=product_2)
#correct_predictions = tf.equal(tf.argmax(product, axis=1), y_tensor)
#accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float64))
mae = tf.losses.absolute_difference(y_tensor, product_2)
correct_predictions = tf.equal(tf.cast(tf.round(product_2), tf.int64), y_tensor, name='correct')
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float64), name='accuracy')
optimizer = tf.train.GradientDescentOptimizer(learning_rate, name='optimizer').minimize(cost)
Even when I decrease learning rate to worthless (1e-100), the loss anyway fluctuates:
Step 2500, Minibatch Loss= 2.8308, Training Accuracy= 0.2525, Training MAE= 1.3107, lr= 0.00000000000000
Step 5000, Minibatch Loss= 2.7827, Training Accuracy= 0.2664, Training MAE= 1.2948, lr= 0.00000000000000
Step 7500, Minibatch Loss= 2.6718, Training Accuracy= 0.2481, Training MAE= 1.2784, lr= 0.00000000000000
Step 10000, Minibatch Loss= 2.6464, Training Accuracy= 0.2603, Training MAE= 1.2718, lr= 0.00000000000000
Step 12500, Minibatch Loss= 2.8204, Training Accuracy= 0.2614, Training MAE= 1.3014, lr= 0.00000000000000
Maybe I have mixed up something? All the data is scaled, so lr=1e-100 cannot influence, although it does.
Will be grateful for any help!
Are you sure that the parameters fluctuate though? You don't display your execution code, however, it is quite possible, that all the metrics that are displayed are only an average over all the batches seen in the current epoch. That would mean that the first row is average over 2500 batches, the second row would be average over 5000 batches, etc.
This would explain the fluctuation. So try printing out your parameters after epochs, if they are indeed changing too, you can eliminate this hypothesis.
I have tried dropout implementation in Tensorflow.
I do know that dropout should be declared as a placeholder and keep_prob parameter during training and testing should be different. However still almost broke my brain trying to find why with dropout the accuracy is so low. When keep_drop = 1, the train accuracy 99%, test accuracy 85%, with keep_drop = 0.5, both train and test accuracy is 16% Any ideas where to look into, anyone? Thank you!
def forward_propagation(X, parameters, keep_prob):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""
# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1,X),b1) # Z1 = np.dot(W1, X) + b1
A1 = tf.nn.relu(Z1) # A1 = relu(Z1)
A1 = tf.nn.dropout(A1,keep_prob) # apply dropout
Z2 = tf.add(tf.matmul(W2,A1),b2) # Z2 = np.dot(W2, a1) + b2
A2 = tf.nn.relu(Z2) # A2 = relu(Z2)
A2 = tf.nn.dropout(A2,keep_prob) # apply dropout
Z3 = tf.add(tf.matmul(W3,A2),b3) # Z3 = np.dot(W3,A2) + b3
return Z3
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001, lambd = 0.03, train_keep_prob = 0.5,
num_epochs = 800, minibatch_size = 32, print_cost = True):
"""
Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
Arguments:
X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
X_test -- training set, of shape (input size = 12288, number of training examples = 120)
Y_test -- test set, of shape (output size = 6, number of test examples = 120)
learning_rate -- learning rate of the optimization
lambd -- L2 regularization hyperparameter
train_keep_prob -- probability of keeping a neuron in hidden layer for dropout implementation
num_epochs -- number of epochs of the optimization loop
minibatch_size -- size of a minibatch
print_cost -- True to print the cost every 100 epochs
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)
X, Y = create_placeholders(n_x, n_y)
keep_prob = tf.placeholder(tf.float32)
# Initialize parameters
parameters = initialize_parameters()
# Forward propagation: Build the forward propagation in the tensorflow graph
Z3 = forward_propagation(X, parameters, keep_prob)
# Cost function: Add cost function to tensorflow graph
cost = compute_cost(Z3, Y, parameters, lambd)
# Backpropagation.
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
# Start the session to compute the tensorflow graph
with tf.Session() as sess:
# Run the initialization
sess.run(init)
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0. # Defines a cost related to an epoch
num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
# Select a minibatch
(minibatch_X, minibatch_Y) = minibatch
# IMPORTANT: The line that runs the graph on a minibatch.
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y, keep_prob: train_keep_prob})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# lets save the parameters in a variable
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train, keep_prob: 1.0}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test, keep_prob: 1.0}))
return parameters
The algo is correct. It is just the keep_prob = 0.5 is too low.
Managed to get 87% accuracy on the test set with the following hyperparameters:
learning_rate = 0.00002, lambd = 0.03, train_keep_prob = 0.90, num_epochs = 1500, minibatch_size = 32,
In the first case your model was overfitting to the data, hence the large difference between the train and test accuracy. Dropout is a regularization technique to reduce the variance of the model by reducing the effect of particular nodes and hence prevent overfitting. But keeping the keep_prob = 0.5(too low) weakens the model and hence it underfits severely to the data, giving an accuracy as low as 16%. You should iterate by gradually decreasing the keep_prob value untill you find a suitable value.
I want to save the model with the highest accuracy, I need to take a batch of validation data in each step to validation after each step to train, the training data set will be reused because of epoch, but if train_batch_size equals to validation_batch_size, the validation data set will also be reused? because the validation data set is far less than the training data set. How should I do it? I mean to reuse the validation set without any problems? Or I set different sizes separately.
MAX_EPOCH = 10
for epoch in range(MAX_EPOCH):
# training
train_step = int(80000 / TRAIN_BATCH_SIZE)
train_loss, train_acc = 0, 0
for step in range(epoch * train_step, (epoch + 1) * train_step):
x_train, y_train = sess.run([x_train_batch, y_train_batch])
train_summary, _, err, ac = sess.run([merged, train_op, loss, acc],
feed_dict={x: x_train, y_: y_train,
mode: learn.ModeKeys.TRAIN,
global_step: step})
train_loss += err
train_acc += ac
if (step + 1) % 100 == 0:
train_writer.add_summary(train_summary, step)
print("Epoch %d,train loss= %.2f,train accuracy=%.2f%%" % (
epoch, (train_loss / train_step), (train_acc / train_step * 100.0)))
# validation
val_step = int(20000 / VAL_BATCH_SIZE)
val_loss, val_acc = 0, 0
for step in range(epoch * val_step, (epoch + 1) * val_step):
x_val, y_val = sess.run([x_val_batch, y_val_batch])
val_summary, err, ac = sess.run([merged, loss, acc],
feed_dict={x: x_val, y_: y_val, mode: learn.ModeKeys.EVAL,
global_step: step})
val_loss += err
val_acc += ac
if (step + 1) % 100 == 0:
valid_writer.add_summary(val_summary, step)
print(
"Epoch %d,validation loss= %.2f,validation accuracy=%.2f%%" % (
epoch, (val_loss / val_step), (val_acc / val_step * 100.0)))
It is possible to use a different batch size during evaluation.
That being said, you should use the same validation set every time you evaluate the model. Otherwise, the results can increase/decrease because the examples you evaluated on were inherently easier/more difficult compared to the previous evaluation.
The code is in python 3.5.2 with Tensor flow. The neural network returns an accuracy of between .10 and 5.00, with the higher value tending to be the accuracy of the training data by a factor of roughly 6. I cannot tell whether the neural network is legitimately doing worse than random guessing or if the accuracy code i am using has a serious fault i cannot see.
The neural network consists of 5 layers:
input
conv1 (with max pooling relu and dropout)
conv2 (with max pooling relu and dropout)
fully connected (with relu)
output
uses default Adam optimizer
I feel very suspicious of my accuracy calculations as i made them differently than what i have seen due to RAM constraints. The accuracy calculation does both the accuracy of the train and test data.
acc_total = 0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.test.num_examples/batch_size)):
test_x, test_y = mnist.test.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: test_x, y: test_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.test.num_examples),end='\r')
print('Epoch', epoch, 'current test set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
acc_total=0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.train.num_examples/batch_size)):
train_x, train_y = mnist.train.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: train_x, y: train_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.train.num_examples),end='\r')
print('Epoch', epoch, 'current train set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
This is a sample of the outputs:
Epoch 0 completed out of 20 loss: 10333239.3396 83.29 ts 429
Epoch 0 current test set accuracy : 0.7072
Epoch 0 current train set accuracy : 3.8039
Epoch 1 completed out of 20 loss: 1831489.40747 39.24 ts 858
Epoch 1 current test set accuracy : 0.7765
Epoch 1 current train set accuracy : 4.2239
Epoch 2 completed out of 20 loss: 1010191.40466 25.89 ts 1287
Epoch 2 current test set accuracy : 0.8069
Epoch 2 current train set accuracy : 4.3898
Epoch 3 completed out of 20 loss: 631960.809082 0.267 ts 1716
Epoch 3 current test set accuracy : 0.8277
Epoch 3 current train set accuracy : 4.4955
Epoch 4 completed out of 20 loss: 439149.724823 2.001 ts 2145
Epoch 4 current test set accuracy : 0.8374
Epoch 4 current train set accuracy : 4.5674
The full code is as follows (sorry about the length i added a lot of comments for my own use ):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#Imported Data set
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
#ammount of output classes
n_classes = 10
#ammount of examples processed at once
#memory impact of ~500MB for 128 with more on eval runs
batch_size = 128
#Times to cycle through the entire imput data set
epoch_amm =20
#Input and outputs placeholders
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32)
#Dropout is 1-keeprate; fc- fully conected layer dropout;conv conv layer droupout
keep_rate_fc=.5
keep_rate_conv=.75
keep_prob=tf.placeholder(tf.float32)
#Regularization paramaters
Regularization_active= False #True and False MUST be capitalized
Lambda= 1.0 #'weight' of the weights on the loss function
# counter for total steps taken by trainer
training_steps = 1
#Learning Rate For Network
base_Rate = .03
decay_steps = 64
decay_rate = .96
Staircase = True
Learning_Rate = tf.train.exponential_decay(base_Rate, training_steps, decay_steps, decay_rate, staircase='Staircase', name='Exp_decay' )
#Convolution Function returns neuronns that act on a section of prev. layer
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
#Pooling function returns max value in 2 by 2 sections
def maxpool2d(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
def relu(x):
return tf.nn.relu(x,'relu')
def add(x, b):
return tf.add(x,b)
#'Main' method, contains the Neural Network
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([7*7*64,1024])),
'W_out':tf.Variable(tf.random_normal([1024,n_classes]))}
biases = {'B_conv1':tf.Variable(tf.random_normal([32])),
'B_conv2':tf.Variable(tf.random_normal([64])),
'B_fc':tf.Variable(tf.random_normal([1024])),
'B_out':tf.Variable(tf.random_normal([n_classes]))}
# Input layer
x = tf.reshape(x, shape=[-1,28,28,1])
#first layer. pass inputs through conv2d and save as conv1 then apply maxpool2d
conv1 = conv2d(x,weights['W_conv1'])
conv1 = add(conv1,biases['B_conv1'])
conv1 = relu(conv1)
conv1 = maxpool2d(conv1)
conv1 = tf.nn.dropout(conv1,keep_rate_conv)
#second layer does same as first layer
conv2 = conv2d(conv1,weights['W_conv2'])
conv2 = add(conv2,biases['B_conv2'])
conv2 = relu(conv2)
conv2 = maxpool2d(conv2)
conv2 = tf.nn.dropout(conv2,keep_rate_conv)
#3rd layer fully connected
fc = tf.reshape(conv2,[-1,7*7*64])
fc = tf.matmul(fc,weights['W_fc'])
fc = add(fc,biases['B_fc'])
fc = relu(fc)
fc = tf.nn.dropout(fc,keep_rate_fc)
#4th and final layer
output = tf.matmul(fc,weights['W_out'])
output = add(output,biases['B_out'])
return output
#Trains The neural Network
def train_neural_network(x):
training_steps = 0
#Initiate The Network
prediction = convolutional_neural_network(x)
#Define the Cost and Cost function
#tf.reduce_mean averages the values of a tensor into one value
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
#Apply Regularization if active
#if Regularization_active :
# print('DEBUG!! LINE 84 REGULARIZATION ACTIVE')
# cost = (tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y))+
# (Lambda*(tf.nn.l2_loss(weight['W_conv1'])+
# tf.nn.l2_loss(weight['W_conv2'])+
# tf.nn.l2_loss(weight['W_fc'])+
# tf.nn.l2_loss(weight['W_out'])+
# tf.nn.l2_loss(biases['B_conv1'])+
# tf.nn.l2_loss(biases['B_conv2'])+
# tf.nn.l2_loss(biases['B_fc'])+
# tf.nn.l2_loss(biases['B_out']))))
#Optimizer + Learning_Rate passthrough
optimizer = tf.train.AdamOptimizer().minimize(cost)
#Get Epoch Ammount
hm_epochs = epoch_amm
#Starts C++ Training session
print('Session Started')
with tf.Session() as sess:
#Initiate all Variables
sess.run(tf.global_variables_initializer())
#Begin Logs
summary_writer = tf.summary.FileWriter('/tmp/logs',sess.graph)
#Start Training
for epoch in range(hm_epochs):
epoch_loss = 0
for count in range(int(mnist.train.num_examples/batch_size)):
training_steps = (training_steps+1)
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
count, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'current epoch loss', epoch_loss, 'batch loss', c,'ts',training_steps,' ', end='\r')
#Log the loss per epoch
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss,' ')
acc_total = 0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.test.num_examples/batch_size)):
test_x, test_y = mnist.test.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: test_x, y: test_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.test.num_examples),end='\r')
print('Epoch', epoch, 'current test set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
acc_total=0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
for _ in range(int(mnist.train.num_examples/batch_size)):
train_x, train_y = mnist.train.next_batch(batch_size)
acc = accuracy.eval(feed_dict={x: train_x, y: train_y})
acc_total += acc
print('Accuracy:',acc_total*batch_size/float(mnist.train.num_examples),end='\r')
print('Epoch', epoch, 'current train set accuracy : ',acc_total*batch_size/float(mnist.test.num_examples))
print('Complete')
sess.close()
#Run the Neural Network
train_neural_network(x)
The CNN had low results because of 4 reasons:
Improper (Lack of) feeding of dropout
-the keep rate was not being fed into accuracy.eval(feed_dict={x: test_x, y: test_y}) causing the network to underpreform in its accuracy evaluations
Poor Initialization of weights
RELU neuron work significantly better with weights closer to zero than normal distribution.
far to high learning rate
Learning rate of .03 even with decay was far far to high and stoped it from training effectively
errors in accuracy function
The accuracy function of the training data was receiving the size of the data set form mnist.test.num_examples instead of the proper mnist.train.num_examples and caused nonsensical values of accuracy in excess of 100%