tensor flow error: logits and labels must be broadcastable - tensorflow

I am having the following error displayed while trying to get tensorflow running:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[30,2] labels_size=[8,2]
Below is my code. I obtained parts of the 1st part of the code from https://blog.francium.tech/build-your-own-image-classifier-with-tensorflow-and-keras-dc147a15e38e and the second from https://www.datacamp.com/community/tutorials/cnn-tensorflow-python. I adopted them to something I am working on where I have some images that belong to 2 different classes. For training, each image class are placed in the same training folder and for testing, each image class is placed in the same testing folder. I figure the error is referring to a mismatch between the logits and label. I have tried tweaking the shapes in the weights and biases as defined in the code below, but this didn't solve the issue. I also tried tampering with the batch size, still no solution. Does anyone have any idea what could cause this error? Could it be how I arranged my training and testing set?
ROOT_PATH = "/my/file/path/images"
train_data_directory = os.path.join(ROOT_PATH, "data/train")
test_data_directory = os.path.join(ROOT_PATH, "data/test")
train_data = train_data_directory
test_data = test_data_directory
def one_hot_label(img):
label = img.split('.')[0]
global ohl
ohl = []
if label == 'A':
ohl = np.array([1,0])
elif label == 'B':
ohl = np.array([0,1])
return ohl
def train_data_with_label():
train_images = []
for i in tqdm(os.listdir(train_data)):
path = os.path.join(train_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
train_images.append([np.array(img), one_hot_label(i)])
shuffle(train_images)
return train_images
def test_data_with_label():
test_images = []
for i in tqdm(os.listdir(test_data)):
path = os.path.join(test_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
test_images.append([np.array(img), one_hot_label(i)])
shuffle(test_images)
return test_images
training_images = train_data_with_label()
testing_images = test_data_with_label()
#both placeholders are of type float
x = tf.placeholder("float", [None, 28,28,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(2), initializer=tf.contrib.layers.xavier_initializer()),
}
def conv_net(x, weights, biases):
# here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
# here we call the conv2d function we had defined above and pass the input image x, weights wc2 and bias bc2.
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 7*7 matrix.
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 4*4.
conv3 = maxpool2d(conv3, k=2)
#print(conv3.shape)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
print(out.shape)
return out
#print(out.shape)
pred = conv_net(x, weights, biases)
#pred.shape
#labelsa = tf.constant(1., shape=y.shape)
#logsa = tf.constant(1., shape=pred.shape)
#labels = labels + tf.zeros_like(logsa)
print(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
print(y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
summary_writer = tf.summary.FileWriter('./Output', sess.graph)
for i in range(training_iters):
#print('here')
for batch in range(len(train_X)//batch_size):
print('here')
#offset = (batch * batch_size) % (train_Y.shape[0] - batch_size)
batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
# Run optimization op (backprop).
# Calculate batch loss and accuracy
print(batch_y.shape)
opt = sess.run(optimizer, feed_dict={x: batch_x,
y: batch_y})
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
print("Iter " + str(i) + ", Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for all 10000 mnist test images
test_acc,valid_loss = sess.run([accuracy,cost], feed_dict={x: test_X,y: test_Y})
train_loss.append(loss)
test_loss.append(valid_loss)
train_accuracy.append(acc)
test_accuracy.append(test_acc)
print("Testing Accuracy:","{:.5f}".format(test_acc))
summary_writer.close()

Related

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

Zero cost in tensorflow multi perceptron

I have built a simple neural network to classify data into only 2 classes
Data is something like this
34.62365962451697,78.0246928153624,0
60.18259938620976,86.30855209546826,1
There are no zero values in data so there's no source of such cost.Cost is zero with adagrad optimiser and nan with gradient descent optimiser
Here's the code
import numpy as ny
import tensorflow as tf
def load():
data = []
for line in open("ex2data1.txt"):
row = line.split(',')
x = ny.array(row, dtype='|S4')
data.append(x.astype(ny.float64))
return ny.array(data)
def multilayer_perceptron(x, weights, biases):
# Hidden layer with ReLU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with ReLU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([2, 15])),
'h2': tf.Variable(tf.random_normal([15, 15])),
'out': tf.Variable(tf.random_normal([15, 1]))
}
biases = {
'b1': tf.Variable(tf.random_normal([15])),
'b2': tf.Variable(tf.random_normal([15])),
'out': tf.Variable(tf.random_normal([1]))
}
data = load()
Xdata = ny.array(data[:, 0:2])
Ydata = ny.array(data[:, 2])
Ydata = ny.array(Ydata.reshape([100, 1]))
# Step 2 - Create input and output placeholders for data
X = tf.placeholder("float", [None, 2], name="X")
Y = tf.placeholder("float", [None, 1], name="Y")
pred = multilayer_perceptron(X, weights, biases)
# Minimize error using cross entropy
with tf.name_scope("cost"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))
optimizer = tf.train.AdagradOptimizer(0.001).minimize(cost)
tf.summary.scalar("cost", cost)
init = tf.global_variables_initializer()
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(init)
print(Xdata)
print(Ydata)
# Step 12 train the model
for i in range(1000):
sess.run(optimizer, feed_dict={X: Xdata, Y: Ydata})
if (i % 100 == 0):
print(sess.run(cost, feed_dict={X: Xdata, Y: Ydata}))
With the way your labels are represented you should not use this loss function. I think this is relevant

Output of loss is None

I have to finetune VGG.There are five convolutional layers and then three fully connected layers. Output from the last fully connected layer is the input of the loss function. Following is my code:
class vgg16:
def __init__(self, imgs1,imgs2, weights=None, sess=None):
self.imgs1 = imgs1
self.imgs2 = imgs2
with tf.variable_scope("siamese") as scope:
self.o1 = self.convlayers(imgs1)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
scope.reuse_variables()
self.o2 = self.convlayers(imgs2)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
#create loss function
def convlayers(self,imgs):
....
# conv1_2
with tf.name_scope('conv1_2') as scope:
......
# pool1
..
)
.....
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
....
# fc2
with tf.name_scope('fc2') as scope:
...
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
def loss(self):
loss=tf.nn.l2_loss(self.fc3l)
self.train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
if __name__ == '__main__':
sess = tf.Session()
imgs1 = tf.placeholder(tf.float32, [None, 224, 224, 3])#jis size ka bhi imaeg hai usko 224x224 may kar diya or RGB chaeay hmay
imgs2 = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
img1 = imread('laska.png', mode='RGB')
img1 = imresize(img1, (224, 224))
img2 = imread('laska2.jpg', mode='RGB')
img2 = imresize(img2,(224, 224))
prob = sess.run(vgg.train_step, feed_dict={vgg.imgs1: [img1],vgg.imgs2: [img2]})
print('loss is:')
print(prob)
The problem is that the output of prob is None. Kindly indicate what I am doing wrong.
PS: I am following siamese architecture. Input to both branches are different images here.
The op self.train_step does not return anything, it just calculates gradients and updates variables. See here.
What you need to do is to save reference to loss tensor in your vgg16 class like this:
self.loss=tf.nn.l2_loss(self.fc3l)
and then execute both train_step and loss operations in single sess.run:
_, loss_value = sess.run([vgg.train_step, vgg.loss], feed_dict=...)
print('loss is:')
print(loss_value)

Optimize input image with class prior

I'm trying to implement the first part of the google blog entry
Inceptionism: Going Deeper into Neural Networks in TensorFlow. So far I have found several resources that either explain it in natural language or focus on other parts or give code snippets for other frameworks. I understand the idea of optimizing a random input image with respect to a class prior and also the maths behind it given in the this paper, section 2, but I'm not able to implement it myself using TensorFlow.
From this SO question and the helpful comment by etarion, I now know that you can give a list of variables to the optimizer, while all other variables are untouched. However, when giving the optimizer a random image as a variable leads to
File "mnist_test.py", line 101, in main
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 198, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 309, in apply_gradients
(converted_grads_and_vars,))
ValueError: No gradients provided for any variable: ((None,<tensorflow.python.ops.variables.Variable object at 0x7feac1870410>),)
For testing purpose I used a stripped down MNIST example. I tried to keep it as short as possible while still being readable and executable:
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 128
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, keep_prob)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# ====== this is where the reconstruction begins =====
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros(10)
one_hot[4] = 1;
# the next line causes the error
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
for i in xrange(1000):
session.run(optimizer2, feed_dict={x: rnd_img, y: one_hot, dropout: 1.0})
sess.close()
if __name__ == "__main__":
main()
The helper functions I used:
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
I've found some sources saying that this error occurs when there is no path within the computation graph between the output and the variables to be optimize, but I don't see why this should be the case here.
My questions are:
Why isn't the optimizer able to apply any gradients?
Is this the right way to go in order to implement the visualization of a class?
Thanks in advance.
Edit:
Here is the complete code again, after incorporation of the accepted answer (for anyone who is interested). Anyway, the results are still not as expected, as the script basically produces random images after 100000 rounds of reconstruction. Ideas are welcome.
import tensorflow as tf
import numpy as np
import skimage.io
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
return fc;
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def save_image(img_data, name):
img = img_data.reshape(28,28)
mi = np.min(img)
ma = np.max(img)
img = (img-mi)/(ma-mi)
skimage.io.imsave(name, img)
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 100
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, dropout)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# reconstruction part
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros((1, 10))
one_hot[0,1] = 1;
net2 = create_net(rnd_img, weights, biases, dropout)
cost2 = -tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net2, y))
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(cost2, var_list=[rnd_img])
init_var_list = []
for var in tf.all_variables():
if(not tf.is_variable_initialized(var).eval(session=sess)):
init_var_list.append(var)
sess.run(tf.initialize_variables(init_var_list))
save_image(rnd_img.eval(sess), "bevor.tiff")
for i in xrange(100000):
_, loss = sess.run([optimizer2, cost2], feed_dict={y: one_hot, dropout: 1.0})
if(i%10000 == 0):
cur_img = rnd_img.eval(session=sess)
print "loss:", loss, "mi:", np.min(cur_img), "ma:", np.max(cur_img)
save_image(rnd_img.eval(sess), "after.tiff")
sess.close()
if __name__ == "__main__":
main()
Some explanation: After rebuilding the graph with the new input variable and optimizer, I had to initialize the new variables, i.e. the rnd_img and some helper variables used by the Adam optimizer, hence the loop over all_variables() and checking for initialization status. If somebody knows a more elegant way, let me know. Or maybe that's the reason why I don't get any results?
The rnd_img needs to part of the graph that you optimize. In your case, you just create a variable and tell the optimizer to optimize it, but the variable is not connected to the loss in the graph. What you can for example do is use another call to create_net with rnd_image instead of x (but using the same weights!), create the cost for that and then create a minimization op for that cost. Then for optimization you only feed in y.

Tensorflow training got stuck after some steps, how to investigate?

I have a python script to train a Tensorflow model similar to the one in CIFAR-10 tutorial. I have 20500 training examples and am using 128 examples per batch. I set 1,000,000 as the max number of steps. However after about 164,000 steps, the python script seems stuck somewhere. Is there any way to find out where the script is stuck? My last resort would be using Ctrl-C to terminate the process and force it to print out a backtrace. But I wonder if there are other things I should check before I kill the process.
Here's the train loop:
def train(trainingData, batchSize, workingDir, maxSteps):
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
image, label = readData(trainingData)
minAfterDequeue = 5000
capacity = minAfterDequeue + 3 * batchSize
imageBatch, labelBatch = tf.train.shuffle_batch([image, label], batch_size=batchSize, capacity=capacity, min_after_dequeue=minAfterDequeue)
#labelBatch = tf.reshape(labelBatch, [batchSize, 1])
#tf.image_summary('images', imageBatch)
#tf.histogram_summary('labels', tf.cast(labelBatch, tf.float32))
logits = network.inference(imageBatch, 0.5)
#floatLabel = tf.cast(labelBatch, tf.float32)
#cross_entropy_per_example = tf.nn.softmax_cross_entropy_with_logits(logits, floatLabel)
loss, cross_entropy = network.loss(logits, labelBatch)
train_op = network.train(loss, global_step, batchSize)
# Create a saver
saver = tf.train.Saver(tf.all_variables())
summary_op = tf.merge_all_summaries()
session = tf.Session()
init = tf.initialize_all_variables()
session.run(init)
tf.train.start_queue_runners(sess=session)
summary_writer = tf.train.SummaryWriter(workingDir, session.graph_def)
for step in xrange(maxSteps):
start_time = time.time()
#l, sm, ce = session.run([floatLabel, logits, cross_entropy_per_example])
#print l
#print sm
#print ce
_, loss_value = session.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
examples_per_sec = batchSize / duration
format_str = "%s: step %d, loss = %e (%.1f examples/sec; %.3f sec/batch"
print (format_str % (datetime.now(), step, loss_value, examples_per_sec, float(duration)))
if step % 100 == 0:
summary_str = session.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0 or (step + 1) == maxSteps:
checkpoint_path = os.path.join(workingDir, 'model.ckpt')
saver.save(session, checkpoint_path, global_step = step)
And here's the various functions used to construct the graph:
import re
import tensorflow as tf
TOWER_NAME="tower"
NUM_EXAMPLES_PER_EPOCH = 50000
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.95 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.01 # Initial learning rate.
def _activation_summary(x):
"""Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measure the sparsity of activations.
Args:
x: Tensor
Returns:
nothing
"""
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
#numChannel = tf.shape(x)[3]
#tf.image_summary(tensor_name + '/image', tf.reshape(x)
def _variable_on_cpu(name, shape, initializer):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
return var
def _variable_with_weight_decay(name, shape, stddev, wd=None):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
Returns:
Variable Tensor
"""
var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def inference(images, dropout):
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 1, 32], stddev=5e-2)
conv = tf.nn.conv2d(images, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 32, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# pool2
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
with tf.variable_scope('conv3') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool2, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
# pool 3
pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# fully connected 4
with tf.variable_scope('full4') as scope:
batchSize = pool3.get_shape()[0].value
flattened = tf.reshape(pool3, [batchSize, -1])
dim = flattened.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 256], stddev=5e-2)
biases = _variable_on_cpu('biases', [256], tf.constant_initializer(0.1))
full4 = tf.nn.relu(tf.matmul(flattened, weights) + biases, name=scope.name)
full4_dropout = tf.nn.dropout(full4, dropout)
_activation_summary(full4)
#_activation_summary(full4_dropout)
# fully connected 5
with tf.variable_scope('full5') as scope:
weights = _variable_with_weight_decay('weights', [256, 128], stddev=5e-2)
biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
full5 = tf.nn.relu(tf.matmul(full4_dropout, weights) + biases, name=scope.name)
full5_dropout = tf.nn.dropout(full5, dropout)
_activation_summary(full5)
#_activation_summary(full5_dropout)
# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [128, 2], stddev=1/128.0)
biases = _variable_on_cpu('biases', [2], tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(full5_dropout, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
def loss(logits, labels):
labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss'), cross_entropy_mean
def _add_loss_summaries(total_loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(loss, step, batchSize):
numBatchesPerEpoch = NUM_EXAMPLES_PER_EPOCH / batchSize
decay_steps = int(numBatchesPerEpoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
loss_averages_op = _add_loss_summaries(loss)
# compute gradients
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(loss)
# apply gradients
apply_gradient_op = opt.apply_gradients(grads, global_step = step)
# add histograms for trainable variables
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
# add histograms for gradients:
for grad, var in grads:
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
# Track the moving average of all trainable variables
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variable_averages_op]):
train_op = tf.no_op(name='train')
return train_op