Related
I'm quite new to deeplearning and this is almost my first code.
My question is, I am getting almost 100 percent training accuracy, but my testing accuracy is only about 0.12.
I tried using regularization because I thought that overfitting was the problem. However, this may not be the problem because after I added some dropouts and regularization, the testing accuracy dropped..lol..
Am I not using the variables that I trained when I test my testing accuracy ?
Thanks a lot.
x = tf.placeholder(tf.float32, shape = [None, 128, 18, 1])
y = tf.placeholder(tf.float32, shape = [None, 6])
is_train = tf.placeholder(tf.bool, name = 'is_train')
keep_prob = tf.placeholder(tf.float32)
y_pred, logits = CNN_model_2(x, 6)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = logits))
regularization_losses = tf.losses.get_regularization_losses()
loss = tf.add_n([loss] + regularization_losses)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
save_dir = 'D:\AMD\Only_oneoverten_sec\Log'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch = next_batch(50, training_features, training_labels)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
loss_print = loss.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
print("Epoch: %d, Training Set Accuracy: %f, Loss: %f" % (i, train_accuracy, loss_print))
sess.run(optimizer, feed_dict = {x: batch[0], y: batch[1], keep_prob:0.8})
saver.save(sess, os.path.join(save_dir, 'CNN_model'), global_step = 1000)
test_accuracy = 0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
print("Test accuracy: %f" %test_accuracy)
yes youre using trained variables but your algorithm just wrong in these labels
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
change like that:
test_accuracy =0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
acc = accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy += acc / 20
or:
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
I am building a convolutional neural network for classifying MNIST data. I m using 2 conv layer and 2 fully connected layer.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def _net_params():
weights = {
'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
'fc1': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])),
'fc2': tf.Variable(tf.random_normal([1024, 10])),
}
biases = {
'conv1': tf.Variable(tf.random_normal([32]),tf.float32),
'conv2': tf.Variable(tf.random_normal([64]),tf.float32),
'fc1': tf.Variable(tf.random_normal([1024]),tf.float32),
'fc2': tf.Variable(tf.random_normal([10]),tf.float32),
}
return weights, biases
def _fc_layer(inputs, weights, biases):
return tf.add(tf.matmul(inputs, weights), biases)
def _conv_layer(inputs, weights, biases, stride=1, padding='SAME'):
layer = tf.nn.conv2d(input=inputs,filter=weights,
strides=[1, stride, stride, 1],padding=padding)
layer = tf.nn.bias_add(layer, biases)
return tf.nn.relu(layer)
def pool_layer(inputs):
pool = tf.nn.max_pool(inputs, ksize=[1, 2, 2, 1], strides=[1, 2,
2, 1], padding="SAME")
return pool
def conv_net(x):
weights, biases = _net_params()
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Conv layers
conv1 = _conv_layer(x, weights['conv1'], biases['conv1'])
pool1 = pool_layer(conv1)
conv2 = _conv_layer(pool1, weights['conv2'], biases['conv2'])
pool2 = pool_layer(conv2)
flattened = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc1 = _fc_layer(flattened, weights['fc1'], biases['fc1'])
fc1 = tf.nn.relu(fc1)
fc2 = _fc_layer(fc1, weights['fc2'], biases['fc2'])
return fc2
def _training():
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
learning_rate_ = tf.placeholder(tf.float32)
pred = conv_net(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=pred,
labels=y_))
optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate_).minimize(cost)
# optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate_).minimize(cost)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
return x, y_, learning_rate_, optimizer, cost, accuracy
def main():
mnist = input_data.read_data_sets('tmp/data', one_hot=True)
n_epochs = 3
batch_size = 200
learning_rate = 0.005
x, y_, learning_rate_, optimizer, cost, accuracy = _training()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
current_epoch = 0
while current_epoch < n_epochs:
current_epoch += 1
print('epoch %s' % (current_epoch,))
current_batch = 1
while current_batch * batch_size <= len(
mnist.train.images):
current_batch += 1
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(fetches=optimizer, feed_dict={x: batch_x,
y_: batch_y, learning_rate_: learning_rate, })
if current_batch % 75 == 0:
loss, acc = sess.run([cost, accuracy], feed_dict=
{x: batch_x, y_: batch_y, learning_rate_: 0.,})
print(' batch %s: batch_loss=%s,
training_accuracy=%s' % (current_batch,
loss, acc,))
print('Training complete !')
print('Final accuracy is %s' % sess.run(accuracy, feed_dict=
{x: mnist.test.images, y_: mnist.test.labels,
learning_rate_: 0.}))
if __name__ == '__main__':
main()
(there might be some indentation error while putting this code here in stack overdlow)
When i used AdamOptimizer, i m getting accuracy >95%.
Accuracy for AdamOptimizer
But when i used GradientDescentOptimizer, i m getting accuracy of 10%.
accuracy for GradientDescentOptimizer
Do you know why i m getting this lower accuracy and how to fix this if i want to use GradientDescentOptimizer.
Thanks
My work is to get the saliency maps through a VGG-based network.
But the mse loss won't decrease as I imagine. So I cannot find why my loss dont decrease.
ps. training dataset is SALICON.
Here's the output:
training epoch 1, loss value is 0.041423566639423
training epoch 2, loss value is 0.041423123329878
training epoch 3, loss value is 0.041430559009314
training epoch 4, loss value is 0.041424177587032
...
training epoch 20, loss value is 0.041416928172112
And I try to change the optimizer, learning rate, loss function, but no one works.
Here's my codes:
def shuffle(photo, grdtr, shuffle=True):
idx = np.arange(0, len(photo))
if shuffle:
np.random.shuffle(idx)
photo_shuffle = [photo[i] for i in idx]
grdtr_shuffle = [grdtr[i] for i in idx]
return np.asarray(photo_shuffle), np.asarray(grdtr_shuffle)
if __name__ == '__main__':
# create the model
x = tf.placeholder(tf.float32, [None, 48, 64, 3])
y_ = tf.placeholder(tf.float32, [None, 48 * 64, 1])
h = tf.placeholder(tf.float32, [None, 48, 64, 1])
y = deepnn(x)
# define loss and optimizer
# cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=(tf.clip_by_value(y,1e-8,tf.reduce_max(y))), labels=y_))
y_ = tf.nn.softmax(y_, dim=1)
cross_entropy = tf.reduce_mean(tf.pow(y - y_, 2))
# cross_entropy = tf.reduce_mean(y_ * tf.log(y_ / y)) #KL
tf.summary.scalar('loss', cross_entropy)
train_step = tf.train.AdamOptimizer(learning_rate = 0.001, beta1 = 0.9, beta2 = 0.999).minimize(cross_entropy)
# do the training
with tf.Session() as sess:
...
# load the training data
photos = np.load('./data/npy/SALICON100/photos_queue.npy')
grdtrs = np.load('./data/npy/SALICON100/grdtrs_queue.npy')
photos = photos / 255.0
grdtrs = grdtrs / 255.0
EPOCH = 20
BATCH_SIZE = 20
TRAINING_SET_SIZE = 20
for j in range(EPOCH):
# photos, grdtrs = shuffle(photos, grdtrs, shuffle=False)
grdtrs = np.resize(grdtrs, [TRAINING_SET_SIZE, 48, 64, 20])
grdtrs = np.reshape(grdtrs,[TRAINING_SET_SIZE, 48 * 64, 20])
_, loss_value, pred_y = sess.run([train_step, cross_entropy,y],feed_dict={x: photos[:20], y_: grdtrs[:20]})
if (j + 1) % 1 == 0:
print('training epoch %d, loss value is %.15f' % (j + 1, loss_value))
np.save('./data/20_photos_test/net_output.npy', pred)
np.save('./data/20_photos_test/net_grdtrs.npy', grdtrs[:20])
# stop the queue threads and properly close the session
...
And I put some codes about the tensors in the sess:
x = tf.placeholder(tf.float32, [None, 48, 64, 3])
y_ = tf.placeholder(tf.float32, [None, 48 * 64, 1])
y = deepnn(x)
cross_entropy = tf.reduce_sum(tf.pow(y-y_sm,2))
In the code you posted you never actually run your train step. You need to call something along the line of sess.run(train_step, feed_dict) to actually train your network. If you do not train your network the loss obviosly will not be reduced.
Also are you sure that you want to use softmax on your labels?
I'm trying the getting started example of tensorflow on this page. I'd like to print sth with respect to cross_entropy but got nothing.
Here is the code, it can also be referred from here.
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
cross_entropy = tf.Print(cross_entropy, [cross_entropy], "###")
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
I cannot find out why tf.Print, which is bound to cross_entropy, printing nothing in each loop.
I think I've already bound tf.Print -> cross_entropy -> train_step and run this train_step. What's my issue?
You're right, tf.Print is (quoting the documentation):
an identity op with the side effect of printing data when evaluating.
Therefore, rightly, you expect to see the value of cross_entropy everytime something flows across the cross_entropy node.
The problem is that you're minimizing the real cross entropy and not the identity node. Thus, in practice, the cross_entropy variable is an identity node that "points" to another variable, that is effectively evaluated.
To solve this, you can force the order of evaluation of the nodes in the graph.
You can constrain the minimization step of being executed only after logging the value. To do this you can use tf.control_dependencies in this way:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
with tf.control_dependencies([tf.Print(cross_entropy, [cross_entropy], "###")]):
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(
sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
I have the code below for a multilabel classification:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
#.........................................................................
learning_rate = 0.001
training_epochs = 5000
display_step = 50
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
#cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#--------------------------------------------------------------------------------
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
And the output is below:
Epoch: 000 Training Accuracy: 0.31500 cost= 62297.6406250000
Epoch: 050 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 100 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 150 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 200 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 250 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 300 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 350 Training Accuracy: 0.30722 cost= 433502.8125000000
...
Epoch: 5000 Training Accuracy: 0.30722 cost= 433502.8125000000
As above, the training accuracy remains same almost all through the training process. I varied the number of hidden layers and learning rate from 0.001, 0.01 to 0.1 and the trend was still same.
I'd appreciate some help on what I may be doing wrong.
The main problem with your code is that you are not using mini-batch gradient descent, and instead you are using the whole training data for each gradient descent update. Additionally 5000 epochs is too many I think, and I guess 50-100 will be enough (you can verify by experiment). Also at the following lines, the second one is redundant and in fact you are running the graph two times in each iteration while you want to do this once:
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
Correct form:
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
The following is modified code( There is comment at the of the lines that I have added # ADDED #):
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
batch_size = 100 # ADDED #
num_batches = x_train.shape[0]/batch_size # ADDED #
learning_rate = 0.001
training_epochs = 5000
display_step = 1
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
for i in xrange(num_batches):# ADDED #
indices = xrange(i*batch_size, (i+1)*batch_size)# ADDED #
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train[indices], y : y_train[indices], p_keep_input: 1.0, p_keep_hidden: 1.0})# ADDED #
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))