I am wondering about the smoothing option of tensorboard. When I set smoothing to 0, I get the following plot of the loss:
This seems strange to me, because I think I calculate the loss only once for each epoch. Why does it have multiple values for one epoch?
Code
The following code with tensorboard --logdir summary_dir created the image:
#!/usr/bin/env python
"""MNIST with Tensorflow."""
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import os
import numpy as np
epochs = 20000
model_checkpoint_path = 'checkpoints/mnist_tf_model.ckpt'
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.0001)
return tf.get_variable(initializer=initial, name='weights')
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.get_variable(initializer=initial, name='biases')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def eval_network(sess, summary_writer, dataset, correct_prediction, epoch):
correct_sum = 0
total_test = 0
training_summary = tf.get_default_graph().get_tensor_by_name("training_accuracy:0")
loss_summary = tf.get_default_graph().get_tensor_by_name("loss:0")
for i in range(dataset.labels.shape[0] / 1000):
feed_dict = {x: dataset.images[i * 1000:(i + 1) * 1000],
y_: dataset.labels[i * 1000:(i + 1) * 1000],
keep_prob: 1.0}
[test_correct, train_summ, loss_summ] = sess.run([correct_prediction,
training_summary,
loss_summary],
feed_dict=feed_dict)
summary_writer.add_summary(train_summ, epoch)
summary_writer.add_summary(loss_summ, epoch)
test_correct = correct_prediction.eval(feed_dict=feed_dict)
correct_sum += sum(test_correct)
total_test += len(test_correct)
return float(correct_sum) / total_test
def log_score(sess, summary_writer, filename, mnist, scoring, epoch):
with open(filename, "a") as myfile:
train = eval_network(sess, summary_writer, mnist.train, scoring, epoch)
test = eval_network(sess, summary_writer, mnist.test, scoring, epoch)
myfile.write("%i;%0.6f;%0.6f\n" % (epoch, train, test))
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
with tf.Session() as sess:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])
with tf.variable_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1, name='ReLU1')
h_pool1 = max_pool_2x2(h_conv1)
with tf.variable_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='ReLU2')
h_pool2 = max_pool_2x2(h_conv2)
with tf.variable_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
with tf.variable_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.variable_scope('softmax'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv),
reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.scalar_summary("training_accuracy", accuracy, name="training_accuracy")
tf.scalar_summary("loss", cross_entropy, name="loss")
summary_writer = tf.train.SummaryWriter('summary_dir', sess.graph)
sess.run(tf.initialize_all_variables())
for i in range(epochs):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
log_score(sess, summary_writer,
'validation-curve-accuracy.csv',
mnist, correct_prediction, i)
train_step.run(feed_dict={x: batch[0],
y_: batch[1],
keep_prob: 0.5})
log_score(sess, summary_writer, 'validation-curve-accuracy.csv',
mnist, correct_prediction, epochs)
log_score evaluates the summaries and it's called every 100 minibatches.
Related
I'm training a CNN model to recognize MNIST datasets, when i run this code, my IDE became unresponsive, i review many times but can't find where is wrong.Here is the code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def weight(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
sess = tf.InteractiveSession()
W_conv1 = weight([5, 5, 1, 32])
b_conv1 = bias([32])
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, shape=[-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight([5, 5, 32, 64])
b_conv2 = bias([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight([7*7*64, 1024])
b_fc1 = bias([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight([1024, 10])
b_fc2 = bias([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_conv, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval({x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run({x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
i can't copy the running information, my cpu worked 100%.
it can just print "step 100, 0.12",and then no thing happened.
I'm new to machine learning and tensorflow. I started by following the MNIST tutorial on the tensorflow site. I got the simple version to work, but when I was following along with the deep CNN, I found an error.
ValueError: Shape must be rank 4 but is rank 1 for 'Conv2D' (op:
'Conv2D') with input shapes: [?,28,28,1], [4].
The problem seems to lie in the line:
x_image = tf.reshape(x, [-1, 28, 28, 1])
Thanks for any help, I'm a bit lost here.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNST_data/", one_hot=True)
import tensorflow as tf
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
y_ = tf.placeholder(tf.float32, [None, 10])
#improvements
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
#layer 1
W_conv1 = ([5,5,1,32])
b_conv1 = ([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#fully connected layer
W_fc1 = weight_variable([3136, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 3136])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#readout, similar to softmax
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#optimization
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
#training
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
#evaluate
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#the session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100==0:
training_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step: %i accuracy: %a" % (i, training_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy: %s" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
Your error is in your 1st convolutional layer - your variables W_conv1 and b_conv1 are just lists (hence rank 1) since you have not used the weight_variable() and bias_variable() functions that you created!
Might be relevant.
This error at least misleading and confusing for me.
As per the error its asking us to check "input shapes", whereas exactly the issue is in filters that you have specified.
That's why #Yuji asking above to use method weight_variable(), which is properly initializing the filters(weights).
I'm trying to run a 2-layer convolutional net for digit recognition on MNIST data-set using Tensorflow in Python3.5. The input is from csv files which I have read in as pandas dataframe. Tensorflow doesn't like pandas dataframe (it didn't accept the input), so I changed it to a numpy array. The following is the entire code-
sess=tf.InteractiveSession()
train=pd.read_csv('train (1).csv',sep=',',header=0,dtype='float32')
x_train=train.iloc[:,1:]
y_train=train.iloc[:,0]
onehot=OneHotEncoder()
y_train=y_train.reshape(-1,1)
y_train=onehot.fit_transform(y_train)
test=pd.read_csv('test.csv',sep=',',header=0)
x=tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
W=tf.Variable(tf.zeros([784,10]))
b=tf.Variable(tf.zeros([10]))
y=tf.nn.softmax(tf.matmul(x,W) +b )
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_train= x_train.as_matrix()
x_image = tf.reshape(x_train, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
sess.run(tf.initialize_all_variables())
k=0
for i in range(20000):
x_batch = x_train[k*100:k+100,:]
y_batch = y_train[k*100:k+100,:]
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:x_batch, y_: y_batch, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: x_batch, y_: y_batch, keep_prob: 0.5})
k+=1
The error which I'm getting is in the accuracy.eval function-
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
The following is the error message-
ValueError: setting an array element with a sequence.
I tried looking up why the error occurs, and there were several reasons for it. However, my input is in the form of an array and I'm not really familiar with tensors, so I'm trouble understanding what is going wrong.
Any help is appreciated.
i am new to cnn and to tensorflow, so i tried to adapt their example of cnn that they run on the mnist dataset onto the SVHN dataset. the images are 32x32 instead of 28x28 and they have 3 color channels instead of one. I get an error message :
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
I cannot yet make heads or tails of the whole thing. Maybe someone spots a few glaring mistakes that i surely made. I encourage you to be brutal :)
Here is my code in its entirety :
import urllib
import os.path
import numpy as np
testfile = urllib.URLopener()
testfile2=urllib.URLopener()
import scipy.io as scp
if not os.path.isfile("test.mat"):
testfile.retrieve("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", "test.mat")
if not os.path.isfile("train.mat"):
testfile.retrieve("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", "train.mat")
testdata=scp.loadmat('test.mat')
traindata=scp.loadmat('train.mat')
trainDataX = traindata['X']
trainDataY = traindata['y']
testDataX = testdata['X']
testDataY = testdata['y']
def OnehotEndoding(Y):
Ytr=[]
for el in Y:
temp=np.zeros(10)
if el==10:
temp[0]=1
elif el==1:
temp[1]=1
elif el==2:
temp[2]=1
elif el==3:
temp[3]=1
elif el==4:
temp[4]=1
elif temp[5]==1:
temp[5]=1
elif temp[6]==1:
temp[6]=1
elif temp[7]==1:
temp[7]=1
elif temp[8]==1:
temp[8]=1
elif temp[9]==1:
temp[9]=1
Ytr.append(temp)
return np.asarray(Ytr)
trainDataY = OnehotEndoding(trainDataY)
testDataY = OnehotEndoding(testDataY)
def transposeArray(data):
print 'started'
xtrain = []
trainLen = data.shape[3]
print trainLen
for x in xrange(trainLen):
xtrain.append(data[:,:,:,x])
xtrain = np.asarray(xtrain)
return xtrain
trainDataX = transposeArray(trainDataX)
testDataX = transposeArray(testDataX)
print trainDataX.shape
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 32,32,3])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,32,32,3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
x = tf.placeholder(tf.float32, shape=[None, 32,32,3])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
epoch=10000
batch_size=100
sess.run(tf.initialize_all_variables())
p = np.random.permutation(range(len(trainDataX)))
trX, trY = trainDataX[p], trainDataY[p]
print len(trainDataX)
start = 0
end = 0
for step in range(epoch):
start = end
end = start + batch_size
if start >= len(trainDataX):
start = 0
end = start + batch_size
if end >= len(trainDataX):
end = len(trainDataX) - 1
inX, outY = trX[start:end], trY[start:end]
#sess.run(optimizer, feed_dict= {tf_X: inX, tf_Y: outY, keep_prob:0.75})
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={x: inX, y_: outY, keep_prob:1})
print("step %d, training accuracy %g"%(step, train_accuracy))
train_step.run(feed_dict={x: inX, y_: outY, keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={
x: testDataX, y_:testDataY , keep_prob: 1.0}))
The warning is quite explicit: you didn't pass any value for a required Placeholder.
First, you define twice the placeholders with:
x = tf.placeholder(tf.float32, shape=[None, 32,32,3])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
So TensorFlow expects you to feed 4 values instead of just 2.
You can delete one of them, this will hopefully remove the error.
I have been trying to adapt the mnist cnn tutorial to my SVHN data.
i dont get an error ( besides something getting ignored)
But the training accuracy just goes to NAN after a few batches.
I am new to cnn and to tensorflow, so i am a bit lost why that is.
( imho it cannot be the learning rate, since i useed the Adam optimizer and the doc said it dynamically adapts the learning rate)
Here is my code:
import urllib
import os.path
import numpy as np
testfile = urllib.URLopener()
testfile2=urllib.URLopener()
import scipy.io as scp
if not os.path.isfile("test.mat"):
testfile.retrieve("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", "test.mat")
if not os.path.isfile("train.mat"):
testfile.retrieve("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", "train.mat")
testdata=scp.loadmat('test.mat')
traindata=scp.loadmat('train.mat')
trainDataX = traindata['X']
trainDataY = traindata['y']
testDataX = testdata['X']
testDataY = testdata['y']
def OnehotEndoding(Y):
Ytr=[]
for el in Y:
temp=np.zeros(10)
if el==10:
temp[0]=1
elif el==1:
temp[1]=1
elif el==2:
temp[2]=1
elif el==3:
temp[3]=1
elif el==4:
temp[4]=1
elif temp[5]==1:
temp[5]=1
elif temp[6]==1:
temp[6]=1
elif temp[7]==1:
temp[7]=1
elif temp[8]==1:
temp[8]=1
elif temp[9]==1:
temp[9]=1
Ytr.append(temp)
return np.asarray(Ytr)
trainDataY = OnehotEndoding(trainDataY)
testDataY = OnehotEndoding(testDataY)
def transposeArray(data):
print 'started'
xtrain = []
trainLen = data.shape[3]
print trainLen
for x in xrange(trainLen):
xtrain.append(data[:,:,:,x])
xtrain = np.asarray(xtrain)
return xtrain
trainDataX = transposeArray(trainDataX)
testDataX = transposeArray(testDataX)
print trainDataX.shape
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 32,32,3])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,32,32,3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([8 * 8* 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
epoch=10000
batch_size=100
sess.run(tf.initialize_all_variables())
p = np.random.permutation(range(len(trainDataX)))
trX, trY = trainDataX[p], trainDataY[p]
print len(trainDataX)
start = 0
end = 0
for step in range(epoch):
start = end
end = start + batch_size
if start >= len(trainDataX):
start = 0
end = start + batch_size
if end >= len(trainDataX):
end = len(trainDataX) - 1
inX, outY = trX[start:end], trY[start:end]
#sess.run(optimizer, feed_dict= {tf_X: inX, tf_Y: outY, keep_prob:0.75})
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={x: inX, y_: outY, keep_prob:1})
print("step %d, training accuracy %g"%(step, train_accuracy))
train_step.run(feed_dict={x: inX, y_: outY, keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={
x: testDataX, y_:testDataY , keep_prob: 1.0}))
Your cross entropy formula seems wrong. Instead use the built in tf.nn.softmax_cross_entropy_with_logits:
logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
y_conv = tf.nn.softmax(logits)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_))