Am I using pre-trained variables on testing set? - tensorflow

I'm quite new to deeplearning and this is almost my first code.
My question is, I am getting almost 100 percent training accuracy, but my testing accuracy is only about 0.12.
I tried using regularization because I thought that overfitting was the problem. However, this may not be the problem because after I added some dropouts and regularization, the testing accuracy dropped..lol..
Am I not using the variables that I trained when I test my testing accuracy ?
Thanks a lot.
x = tf.placeholder(tf.float32, shape = [None, 128, 18, 1])
y = tf.placeholder(tf.float32, shape = [None, 6])
is_train = tf.placeholder(tf.bool, name = 'is_train')
keep_prob = tf.placeholder(tf.float32)
y_pred, logits = CNN_model_2(x, 6)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = logits))
regularization_losses = tf.losses.get_regularization_losses()
loss = tf.add_n([loss] + regularization_losses)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
save_dir = 'D:\AMD\Only_oneoverten_sec\Log'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch = next_batch(50, training_features, training_labels)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
loss_print = loss.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
print("Epoch: %d, Training Set Accuracy: %f, Loss: %f" % (i, train_accuracy, loss_print))
sess.run(optimizer, feed_dict = {x: batch[0], y: batch[1], keep_prob:0.8})
saver.save(sess, os.path.join(save_dir, 'CNN_model'), global_step = 1000)
test_accuracy = 0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
print("Test accuracy: %f" %test_accuracy)

yes youre using trained variables but your algorithm just wrong in these labels
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
change like that:
test_accuracy =0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
acc = accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy += acc / 20
or:
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20

Related

GradientDescentOptimizer is giving less accuracy (~0.10) compared to AdamOptimizer(0.95) in convolutional neural net in Tensorflow

I am building a convolutional neural network for classifying MNIST data. I m using 2 conv layer and 2 fully connected layer.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def _net_params():
weights = {
'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
'fc1': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])),
'fc2': tf.Variable(tf.random_normal([1024, 10])),
}
biases = {
'conv1': tf.Variable(tf.random_normal([32]),tf.float32),
'conv2': tf.Variable(tf.random_normal([64]),tf.float32),
'fc1': tf.Variable(tf.random_normal([1024]),tf.float32),
'fc2': tf.Variable(tf.random_normal([10]),tf.float32),
}
return weights, biases
def _fc_layer(inputs, weights, biases):
return tf.add(tf.matmul(inputs, weights), biases)
def _conv_layer(inputs, weights, biases, stride=1, padding='SAME'):
layer = tf.nn.conv2d(input=inputs,filter=weights,
strides=[1, stride, stride, 1],padding=padding)
layer = tf.nn.bias_add(layer, biases)
return tf.nn.relu(layer)
def pool_layer(inputs):
pool = tf.nn.max_pool(inputs, ksize=[1, 2, 2, 1], strides=[1, 2,
2, 1], padding="SAME")
return pool
def conv_net(x):
weights, biases = _net_params()
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Conv layers
conv1 = _conv_layer(x, weights['conv1'], biases['conv1'])
pool1 = pool_layer(conv1)
conv2 = _conv_layer(pool1, weights['conv2'], biases['conv2'])
pool2 = pool_layer(conv2)
flattened = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc1 = _fc_layer(flattened, weights['fc1'], biases['fc1'])
fc1 = tf.nn.relu(fc1)
fc2 = _fc_layer(fc1, weights['fc2'], biases['fc2'])
return fc2
def _training():
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
learning_rate_ = tf.placeholder(tf.float32)
pred = conv_net(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=pred,
labels=y_))
optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate_).minimize(cost)
# optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate_).minimize(cost)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
return x, y_, learning_rate_, optimizer, cost, accuracy
def main():
mnist = input_data.read_data_sets('tmp/data', one_hot=True)
n_epochs = 3
batch_size = 200
learning_rate = 0.005
x, y_, learning_rate_, optimizer, cost, accuracy = _training()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
current_epoch = 0
while current_epoch < n_epochs:
current_epoch += 1
print('epoch %s' % (current_epoch,))
current_batch = 1
while current_batch * batch_size <= len(
mnist.train.images):
current_batch += 1
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(fetches=optimizer, feed_dict={x: batch_x,
y_: batch_y, learning_rate_: learning_rate, })
if current_batch % 75 == 0:
loss, acc = sess.run([cost, accuracy], feed_dict=
{x: batch_x, y_: batch_y, learning_rate_: 0.,})
print(' batch %s: batch_loss=%s,
training_accuracy=%s' % (current_batch,
loss, acc,))
print('Training complete !')
print('Final accuracy is %s' % sess.run(accuracy, feed_dict=
{x: mnist.test.images, y_: mnist.test.labels,
learning_rate_: 0.}))
if __name__ == '__main__':
main()
(there might be some indentation error while putting this code here in stack overdlow)
When i used AdamOptimizer, i m getting accuracy >95%.
Accuracy for AdamOptimizer
But when i used GradientDescentOptimizer, i m getting accuracy of 10%.
accuracy for GradientDescentOptimizer
Do you know why i m getting this lower accuracy and how to fix this if i want to use GradientDescentOptimizer.
Thanks

Tensorflow FailedPreconditionError: Attempting to use uninitialized value Variable

I follow the instruction of 'Build a Multilayer Convolutional Network' on the official website. My code is exactly the same as the code they provide on the website. [https://www.tensorflow.org/get_started/mnist/pros]
I also remember calling initializing global variables.
However, error arises.
But if I change tf.Session() to tf.InteractiveSession(), it works.
What's wrong here? Thanks in advance.
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1)+b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2)+b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7*7*64,1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2)+b_fc2
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
for i in range(100):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:batch[0], y_:batch[1], keep_prob:1})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={
x:batch[0], y_:batch[1], keep_prob:0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_:mnist.test.labels, keep_prob: 1.0}))
with tf.Session() as sess:
...
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_:mnist.test.labels, keep_prob: 1.0}))
when use tf.Session You should put print method in the with block for setting sess when run eval.
For InteractiveSession it will set the default session, so you can excute eval and run with this default session.

How to get ROC_Curve and Confusion Matrix in TensorFlow

I am trying to get an roc_curve and confusion matrix in Tensorflow. I used the sklearn.metrics function and I am getting an error. My code is below:
from sklearn.metrics import roc_curve, auc
n_inputs = x_train.shape[1]
n_hidden1 = 500
n_hidden2 = 200
n_outputs = 2
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
hidden1 = tf.layers.dense(X, n_hidden1, activation=None)
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=None)
logits = tf.layers.dense(hidden2, n_outputs)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
training_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
n_epochs = 20
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
sess.run(training_op, feed_dict={X: x_train, y: y_train})
acc_train = accuracy.eval(feed_dict={X: x_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: x_test, y: y_test})
print("Epoch:", epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
y_score = np.array(logits)
roc_curve(y_test, y_score)
The error I got is below:
TypeError: Singleton array array(<tf.Tensor 'dense_26/BiasAdd:0' shape=(?, 2) dtype=float32>, dtype=object) cannot be considered a valid collection.
Any help will be appreciated. Thanks!
When you call
sess.run(training_op, feed_dict={X: x_train, y: y_train})
You need to request that the network return the value of the logits tensor, change it to this:
training_op_result, logits_result = sess.run([training_op, logits], feed_dict={X: x_train, y: y_train})
y_score = np.array(logits_result)
roc_curve(y_test, y_score)
A tensor is a graph object. You access value/results of a tensor or computation through sess.run.

Multilabel Classification with Tensorflow

I have the code below for a multilabel classification:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
#.........................................................................
learning_rate = 0.001
training_epochs = 5000
display_step = 50
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
#cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#--------------------------------------------------------------------------------
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
And the output is below:
Epoch: 000 Training Accuracy: 0.31500 cost= 62297.6406250000
Epoch: 050 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 100 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 150 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 200 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 250 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 300 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 350 Training Accuracy: 0.30722 cost= 433502.8125000000
...
Epoch: 5000 Training Accuracy: 0.30722 cost= 433502.8125000000
As above, the training accuracy remains same almost all through the training process. I varied the number of hidden layers and learning rate from 0.001, 0.01 to 0.1 and the trend was still same.
I'd appreciate some help on what I may be doing wrong.
The main problem with your code is that you are not using mini-batch gradient descent, and instead you are using the whole training data for each gradient descent update. Additionally 5000 epochs is too many I think, and I guess 50-100 will be enough (you can verify by experiment). Also at the following lines, the second one is redundant and in fact you are running the graph two times in each iteration while you want to do this once:
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
Correct form:
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
The following is modified code( There is comment at the of the lines that I have added # ADDED #):
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
batch_size = 100 # ADDED #
num_batches = x_train.shape[0]/batch_size # ADDED #
learning_rate = 0.001
training_epochs = 5000
display_step = 1
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
for i in xrange(num_batches):# ADDED #
indices = xrange(i*batch_size, (i+1)*batch_size)# ADDED #
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train[indices], y : y_train[indices], p_keep_input: 1.0, p_keep_hidden: 1.0})# ADDED #
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))

Can't seem to get Tensorflow's tf.metrics.auc working

Tensorflow has a function to calculate AUC: tf.metrics.auc(). Here is my a section of my code trying to compute auc:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 0.8, p_keep_hidden: 0.5})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 0.8, p_keep_hidden: 0.5})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.5f}".format(avg_cost))
print("Optimization Done!")
roc_score = tf.metrics.auc(y, pred)
roc_score = tf.convert_to_tensor(roc_score)
print(roc_score.eval({x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
Any section of the error I get is below. The entire error is quite lengthy.
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value auc_4/false_positives
[[Node: auc_4/false_positives/read = Identity[T=DT_FLOAT, _class=["loc:#auc_4/false_positives"], _device="/job:localhost/replica:0/task:0/cpu:0"](auc_4/false_positives)]]
I'd appreciate any pointers on how to resolve this. Thanks
might be too late now but if you haven't found the solution, try this change:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
_,roc_score = tf.metrics.auc(y, pred)
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))