training CNN cpu in 100% but failed - tensorflow

I'm training a CNN model to recognize MNIST datasets, when i run this code, my IDE became unresponsive, i review many times but can't find where is wrong.Here is the code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def weight(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
sess = tf.InteractiveSession()
W_conv1 = weight([5, 5, 1, 32])
b_conv1 = bias([32])
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, shape=[-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight([5, 5, 32, 64])
b_conv2 = bias([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight([7*7*64, 1024])
b_fc1 = bias([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight([1024, 10])
b_fc2 = bias([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_conv, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval({x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run({x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval({x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
i can't copy the running information, my cpu worked 100%.
it can just print "step 100, 0.12",and then no thing happened.

Related

Unable to get accuracy beyond 30% for MNIST expert example Tensorflow

I followed the MNIST experts example on tensorflow website, but having real difficulty getting anywhere close to the figure suggested by the website.
With 20k iterations, batch size 50, and either GPU/CPU used as device I hardly get 30% accuracy.
Can you please help?
def main(args):
mnist = input_data.read_data_sets("/MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])
w_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv_2d(x_image, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
w_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv_2d(h_pool1, w_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Densely Connected Layer
w_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
w_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdadeltaOptimizer(0.0001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
config = tf.ConfigProto(device_count={'GPU': 0})
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1})
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0
})
print("step %d, training accuracy %g" %(i, train_accuracy))
print("test accuracy %g" %accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
if name == 'main':
tf.app.run(main = main)

Tensorflow MNIST: ValueError: Shape must be rank 4 but is rank 1 for 'Conv2D' (op: 'Conv2D') with input shapes: [?,28,28,1], [4]

I'm new to machine learning and tensorflow. I started by following the MNIST tutorial on the tensorflow site. I got the simple version to work, but when I was following along with the deep CNN, I found an error.
ValueError: Shape must be rank 4 but is rank 1 for 'Conv2D' (op:
'Conv2D') with input shapes: [?,28,28,1], [4].
The problem seems to lie in the line:
x_image = tf.reshape(x, [-1, 28, 28, 1])
Thanks for any help, I'm a bit lost here.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNST_data/", one_hot=True)
import tensorflow as tf
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
y_ = tf.placeholder(tf.float32, [None, 10])
#improvements
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
#layer 1
W_conv1 = ([5,5,1,32])
b_conv1 = ([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#fully connected layer
W_fc1 = weight_variable([3136, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 3136])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#readout, similar to softmax
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#optimization
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
#training
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
#evaluate
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#the session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100==0:
training_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step: %i accuracy: %a" % (i, training_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy: %s" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
Your error is in your 1st convolutional layer - your variables W_conv1 and b_conv1 are just lists (hence rank 1) since you have not used the weight_variable() and bias_variable() functions that you created!
Might be relevant.
This error at least misleading and confusing for me.
As per the error its asking us to check "input shapes", whereas exactly the issue is in filters that you have specified.
That's why #Yuji asking above to use method weight_variable(), which is properly initializing the filters(weights).

Why is the loss taking multiple values for one epoch?

I am wondering about the smoothing option of tensorboard. When I set smoothing to 0, I get the following plot of the loss:
This seems strange to me, because I think I calculate the loss only once for each epoch. Why does it have multiple values for one epoch?
Code
The following code with tensorboard --logdir summary_dir created the image:
#!/usr/bin/env python
"""MNIST with Tensorflow."""
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import os
import numpy as np
epochs = 20000
model_checkpoint_path = 'checkpoints/mnist_tf_model.ckpt'
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.0001)
return tf.get_variable(initializer=initial, name='weights')
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.get_variable(initializer=initial, name='biases')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def eval_network(sess, summary_writer, dataset, correct_prediction, epoch):
correct_sum = 0
total_test = 0
training_summary = tf.get_default_graph().get_tensor_by_name("training_accuracy:0")
loss_summary = tf.get_default_graph().get_tensor_by_name("loss:0")
for i in range(dataset.labels.shape[0] / 1000):
feed_dict = {x: dataset.images[i * 1000:(i + 1) * 1000],
y_: dataset.labels[i * 1000:(i + 1) * 1000],
keep_prob: 1.0}
[test_correct, train_summ, loss_summ] = sess.run([correct_prediction,
training_summary,
loss_summary],
feed_dict=feed_dict)
summary_writer.add_summary(train_summ, epoch)
summary_writer.add_summary(loss_summ, epoch)
test_correct = correct_prediction.eval(feed_dict=feed_dict)
correct_sum += sum(test_correct)
total_test += len(test_correct)
return float(correct_sum) / total_test
def log_score(sess, summary_writer, filename, mnist, scoring, epoch):
with open(filename, "a") as myfile:
train = eval_network(sess, summary_writer, mnist.train, scoring, epoch)
test = eval_network(sess, summary_writer, mnist.test, scoring, epoch)
myfile.write("%i;%0.6f;%0.6f\n" % (epoch, train, test))
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
with tf.Session() as sess:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])
with tf.variable_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1, name='ReLU1')
h_pool1 = max_pool_2x2(h_conv1)
with tf.variable_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='ReLU2')
h_pool2 = max_pool_2x2(h_conv2)
with tf.variable_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
with tf.variable_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.variable_scope('softmax'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv),
reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.scalar_summary("training_accuracy", accuracy, name="training_accuracy")
tf.scalar_summary("loss", cross_entropy, name="loss")
summary_writer = tf.train.SummaryWriter('summary_dir', sess.graph)
sess.run(tf.initialize_all_variables())
for i in range(epochs):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
log_score(sess, summary_writer,
'validation-curve-accuracy.csv',
mnist, correct_prediction, i)
train_step.run(feed_dict={x: batch[0],
y_: batch[1],
keep_prob: 0.5})
log_score(sess, summary_writer, 'validation-curve-accuracy.csv',
mnist, correct_prediction, epochs)
log_score evaluates the summaries and it's called every 100 minibatches.

Python TensorFlow: ValueError: setting an array element with a sequence

I'm trying to run a 2-layer convolutional net for digit recognition on MNIST data-set using Tensorflow in Python3.5. The input is from csv files which I have read in as pandas dataframe. Tensorflow doesn't like pandas dataframe (it didn't accept the input), so I changed it to a numpy array. The following is the entire code-
sess=tf.InteractiveSession()
train=pd.read_csv('train (1).csv',sep=',',header=0,dtype='float32')
x_train=train.iloc[:,1:]
y_train=train.iloc[:,0]
onehot=OneHotEncoder()
y_train=y_train.reshape(-1,1)
y_train=onehot.fit_transform(y_train)
test=pd.read_csv('test.csv',sep=',',header=0)
x=tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
W=tf.Variable(tf.zeros([784,10]))
b=tf.Variable(tf.zeros([10]))
y=tf.nn.softmax(tf.matmul(x,W) +b )
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_train= x_train.as_matrix()
x_image = tf.reshape(x_train, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
sess.run(tf.initialize_all_variables())
k=0
for i in range(20000):
x_batch = x_train[k*100:k+100,:]
y_batch = y_train[k*100:k+100,:]
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:x_batch, y_: y_batch, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: x_batch, y_: y_batch, keep_prob: 0.5})
k+=1
The error which I'm getting is in the accuracy.eval function-
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
The following is the error message-
ValueError: setting an array element with a sequence.
I tried looking up why the error occurs, and there were several reasons for it. However, my input is in the form of an array and I'm not really familiar with tensors, so I'm trouble understanding what is going wrong.
Any help is appreciated.

Tensorflow Incompatible Shapes Error in Tutorial

I've been trying to create the convolutional network from the Tensorflow tutorial, but I've been having trouble. For some reason, I'm getting errors where the size of y_conv is 4x larger than the size of y_, and I have no idea why. I found this question, but it appears to be a different problem than mine, though it looks similar.
To be clear, the batch size in the below code is 50, but the error it's coming up with is
tensorflow.python.framework.errors.InvalidArgumentError: Incompatible shapes: [200] vs. [50]
and when I change the batch size to 10, I get
tensorflow.python.framework.errors.InvalidArgumentError: Incompatible shapes: [40] vs. [10]
so it's related to the batch size somehow, but I can't figure that out. Can anybody tell me what's wrong with this code? It's pretty much straight from the tutorial linked above.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
import tensorflow as tf
sess = tf.InteractiveSession()
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])
w_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
w_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_conv1, w_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
w_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y_conv, 1e-10, 1.0)))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
The reshapes with the -1's are clues. It's not the batch size that's wrong it's the image size. You're flattening it out into the batch dimension.
Why is the image the wrong size?
On the second conv you're passing conv1 instead of pool1
conv2d(h_conv1, w_conv2).
Personally for pipelines like this I like to use 1 name for the data as it flows through.
Start using a debugger, it's worth it!