I have the code below for a multilabel classification:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
#.........................................................................
learning_rate = 0.001
training_epochs = 5000
display_step = 50
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
#cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#--------------------------------------------------------------------------------
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
And the output is below:
Epoch: 000 Training Accuracy: 0.31500 cost= 62297.6406250000
Epoch: 050 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 100 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 150 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 200 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 250 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 300 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 350 Training Accuracy: 0.30722 cost= 433502.8125000000
...
Epoch: 5000 Training Accuracy: 0.30722 cost= 433502.8125000000
As above, the training accuracy remains same almost all through the training process. I varied the number of hidden layers and learning rate from 0.001, 0.01 to 0.1 and the trend was still same.
I'd appreciate some help on what I may be doing wrong.
The main problem with your code is that you are not using mini-batch gradient descent, and instead you are using the whole training data for each gradient descent update. Additionally 5000 epochs is too many I think, and I guess 50-100 will be enough (you can verify by experiment). Also at the following lines, the second one is redundant and in fact you are running the graph two times in each iteration while you want to do this once:
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
Correct form:
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
The following is modified code( There is comment at the of the lines that I have added # ADDED #):
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
batch_size = 100 # ADDED #
num_batches = x_train.shape[0]/batch_size # ADDED #
learning_rate = 0.001
training_epochs = 5000
display_step = 1
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
for i in xrange(num_batches):# ADDED #
indices = xrange(i*batch_size, (i+1)*batch_size)# ADDED #
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train[indices], y : y_train[indices], p_keep_input: 1.0, p_keep_hidden: 1.0})# ADDED #
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
Related
I'm quite new to deeplearning and this is almost my first code.
My question is, I am getting almost 100 percent training accuracy, but my testing accuracy is only about 0.12.
I tried using regularization because I thought that overfitting was the problem. However, this may not be the problem because after I added some dropouts and regularization, the testing accuracy dropped..lol..
Am I not using the variables that I trained when I test my testing accuracy ?
Thanks a lot.
x = tf.placeholder(tf.float32, shape = [None, 128, 18, 1])
y = tf.placeholder(tf.float32, shape = [None, 6])
is_train = tf.placeholder(tf.bool, name = 'is_train')
keep_prob = tf.placeholder(tf.float32)
y_pred, logits = CNN_model_2(x, 6)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = logits))
regularization_losses = tf.losses.get_regularization_losses()
loss = tf.add_n([loss] + regularization_losses)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
save_dir = 'D:\AMD\Only_oneoverten_sec\Log'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch = next_batch(50, training_features, training_labels)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
loss_print = loss.eval(feed_dict = {x: batch[0], y: batch[1], keep_prob: 1.0})
print("Epoch: %d, Training Set Accuracy: %f, Loss: %f" % (i, train_accuracy, loss_print))
sess.run(optimizer, feed_dict = {x: batch[0], y: batch[1], keep_prob:0.8})
saver.save(sess, os.path.join(save_dir, 'CNN_model'), global_step = 1000)
test_accuracy = 0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
print("Test accuracy: %f" %test_accuracy)
yes youre using trained variables but your algorithm just wrong in these labels
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
change like that:
test_accuracy =0
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
acc = accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy += acc / 20
or:
for i in range(20):
test_batch = next_batch(20, testing_features, testing_labels)
test_accuracy = test_accuracy + accuracy.eval(feed_dict = {x: test_batch[0], y: test_batch[1], keep_prob: 1.0})
test_accuracy = test_accuracy / 20
I am building a convolutional neural network for classifying MNIST data. I m using 2 conv layer and 2 fully connected layer.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def _net_params():
weights = {
'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
'fc1': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])),
'fc2': tf.Variable(tf.random_normal([1024, 10])),
}
biases = {
'conv1': tf.Variable(tf.random_normal([32]),tf.float32),
'conv2': tf.Variable(tf.random_normal([64]),tf.float32),
'fc1': tf.Variable(tf.random_normal([1024]),tf.float32),
'fc2': tf.Variable(tf.random_normal([10]),tf.float32),
}
return weights, biases
def _fc_layer(inputs, weights, biases):
return tf.add(tf.matmul(inputs, weights), biases)
def _conv_layer(inputs, weights, biases, stride=1, padding='SAME'):
layer = tf.nn.conv2d(input=inputs,filter=weights,
strides=[1, stride, stride, 1],padding=padding)
layer = tf.nn.bias_add(layer, biases)
return tf.nn.relu(layer)
def pool_layer(inputs):
pool = tf.nn.max_pool(inputs, ksize=[1, 2, 2, 1], strides=[1, 2,
2, 1], padding="SAME")
return pool
def conv_net(x):
weights, biases = _net_params()
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Conv layers
conv1 = _conv_layer(x, weights['conv1'], biases['conv1'])
pool1 = pool_layer(conv1)
conv2 = _conv_layer(pool1, weights['conv2'], biases['conv2'])
pool2 = pool_layer(conv2)
flattened = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc1 = _fc_layer(flattened, weights['fc1'], biases['fc1'])
fc1 = tf.nn.relu(fc1)
fc2 = _fc_layer(fc1, weights['fc2'], biases['fc2'])
return fc2
def _training():
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
learning_rate_ = tf.placeholder(tf.float32)
pred = conv_net(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=pred,
labels=y_))
optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate_).minimize(cost)
# optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate_).minimize(cost)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
return x, y_, learning_rate_, optimizer, cost, accuracy
def main():
mnist = input_data.read_data_sets('tmp/data', one_hot=True)
n_epochs = 3
batch_size = 200
learning_rate = 0.005
x, y_, learning_rate_, optimizer, cost, accuracy = _training()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
current_epoch = 0
while current_epoch < n_epochs:
current_epoch += 1
print('epoch %s' % (current_epoch,))
current_batch = 1
while current_batch * batch_size <= len(
mnist.train.images):
current_batch += 1
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(fetches=optimizer, feed_dict={x: batch_x,
y_: batch_y, learning_rate_: learning_rate, })
if current_batch % 75 == 0:
loss, acc = sess.run([cost, accuracy], feed_dict=
{x: batch_x, y_: batch_y, learning_rate_: 0.,})
print(' batch %s: batch_loss=%s,
training_accuracy=%s' % (current_batch,
loss, acc,))
print('Training complete !')
print('Final accuracy is %s' % sess.run(accuracy, feed_dict=
{x: mnist.test.images, y_: mnist.test.labels,
learning_rate_: 0.}))
if __name__ == '__main__':
main()
(there might be some indentation error while putting this code here in stack overdlow)
When i used AdamOptimizer, i m getting accuracy >95%.
Accuracy for AdamOptimizer
But when i used GradientDescentOptimizer, i m getting accuracy of 10%.
accuracy for GradientDescentOptimizer
Do you know why i m getting this lower accuracy and how to fix this if i want to use GradientDescentOptimizer.
Thanks
My work is to get the saliency maps through a VGG-based network.
But the mse loss won't decrease as I imagine. So I cannot find why my loss dont decrease.
ps. training dataset is SALICON.
Here's the output:
training epoch 1, loss value is 0.041423566639423
training epoch 2, loss value is 0.041423123329878
training epoch 3, loss value is 0.041430559009314
training epoch 4, loss value is 0.041424177587032
...
training epoch 20, loss value is 0.041416928172112
And I try to change the optimizer, learning rate, loss function, but no one works.
Here's my codes:
def shuffle(photo, grdtr, shuffle=True):
idx = np.arange(0, len(photo))
if shuffle:
np.random.shuffle(idx)
photo_shuffle = [photo[i] for i in idx]
grdtr_shuffle = [grdtr[i] for i in idx]
return np.asarray(photo_shuffle), np.asarray(grdtr_shuffle)
if __name__ == '__main__':
# create the model
x = tf.placeholder(tf.float32, [None, 48, 64, 3])
y_ = tf.placeholder(tf.float32, [None, 48 * 64, 1])
h = tf.placeholder(tf.float32, [None, 48, 64, 1])
y = deepnn(x)
# define loss and optimizer
# cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=(tf.clip_by_value(y,1e-8,tf.reduce_max(y))), labels=y_))
y_ = tf.nn.softmax(y_, dim=1)
cross_entropy = tf.reduce_mean(tf.pow(y - y_, 2))
# cross_entropy = tf.reduce_mean(y_ * tf.log(y_ / y)) #KL
tf.summary.scalar('loss', cross_entropy)
train_step = tf.train.AdamOptimizer(learning_rate = 0.001, beta1 = 0.9, beta2 = 0.999).minimize(cross_entropy)
# do the training
with tf.Session() as sess:
...
# load the training data
photos = np.load('./data/npy/SALICON100/photos_queue.npy')
grdtrs = np.load('./data/npy/SALICON100/grdtrs_queue.npy')
photos = photos / 255.0
grdtrs = grdtrs / 255.0
EPOCH = 20
BATCH_SIZE = 20
TRAINING_SET_SIZE = 20
for j in range(EPOCH):
# photos, grdtrs = shuffle(photos, grdtrs, shuffle=False)
grdtrs = np.resize(grdtrs, [TRAINING_SET_SIZE, 48, 64, 20])
grdtrs = np.reshape(grdtrs,[TRAINING_SET_SIZE, 48 * 64, 20])
_, loss_value, pred_y = sess.run([train_step, cross_entropy,y],feed_dict={x: photos[:20], y_: grdtrs[:20]})
if (j + 1) % 1 == 0:
print('training epoch %d, loss value is %.15f' % (j + 1, loss_value))
np.save('./data/20_photos_test/net_output.npy', pred)
np.save('./data/20_photos_test/net_grdtrs.npy', grdtrs[:20])
# stop the queue threads and properly close the session
...
And I put some codes about the tensors in the sess:
x = tf.placeholder(tf.float32, [None, 48, 64, 3])
y_ = tf.placeholder(tf.float32, [None, 48 * 64, 1])
y = deepnn(x)
cross_entropy = tf.reduce_sum(tf.pow(y-y_sm,2))
In the code you posted you never actually run your train step. You need to call something along the line of sess.run(train_step, feed_dict) to actually train your network. If you do not train your network the loss obviosly will not be reduced.
Also are you sure that you want to use softmax on your labels?
I am trying to get an roc_curve and confusion matrix in Tensorflow. I used the sklearn.metrics function and I am getting an error. My code is below:
from sklearn.metrics import roc_curve, auc
n_inputs = x_train.shape[1]
n_hidden1 = 500
n_hidden2 = 200
n_outputs = 2
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
hidden1 = tf.layers.dense(X, n_hidden1, activation=None)
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=None)
logits = tf.layers.dense(hidden2, n_outputs)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
training_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
n_epochs = 20
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
sess.run(training_op, feed_dict={X: x_train, y: y_train})
acc_train = accuracy.eval(feed_dict={X: x_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: x_test, y: y_test})
print("Epoch:", epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
y_score = np.array(logits)
roc_curve(y_test, y_score)
The error I got is below:
TypeError: Singleton array array(<tf.Tensor 'dense_26/BiasAdd:0' shape=(?, 2) dtype=float32>, dtype=object) cannot be considered a valid collection.
Any help will be appreciated. Thanks!
When you call
sess.run(training_op, feed_dict={X: x_train, y: y_train})
You need to request that the network return the value of the logits tensor, change it to this:
training_op_result, logits_result = sess.run([training_op, logits], feed_dict={X: x_train, y: y_train})
y_score = np.array(logits_result)
roc_curve(y_test, y_score)
A tensor is a graph object. You access value/results of a tensor or computation through sess.run.
Tensorflow has a function to calculate AUC: tf.metrics.auc(). Here is my a section of my code trying to compute auc:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 0.8, p_keep_hidden: 0.5})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 0.8, p_keep_hidden: 0.5})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.5f}".format(avg_cost))
print("Optimization Done!")
roc_score = tf.metrics.auc(y, pred)
roc_score = tf.convert_to_tensor(roc_score)
print(roc_score.eval({x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
Any section of the error I get is below. The entire error is quite lengthy.
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value auc_4/false_positives
[[Node: auc_4/false_positives/read = Identity[T=DT_FLOAT, _class=["loc:#auc_4/false_positives"], _device="/job:localhost/replica:0/task:0/cpu:0"](auc_4/false_positives)]]
I'd appreciate any pointers on how to resolve this. Thanks
might be too late now but if you haven't found the solution, try this change:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
_,roc_score = tf.metrics.auc(y, pred)
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))