Tensorflow network too large for GPU memory - tensorflow

I have a rather large network, and my GPU is running out of memory. It isn't a bug in any of the code, the network itself is simply too large to fit into memory. I've even tried the GPU config recommendations here.
For example, I've tried both of the gpu_options below...
gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1
But I'm still running out of memory. I have been informed by GitHub user #girving here that Tensorflow doesn't handle Memory overflow (which makes no sense to me why they wouldn't implement this).
However, he also claimed there are workarounds. I can't find any support of anyone who has had to implement a workaround. Can anyone point me in the right direction? Can I implement Queuing somehow?
For reference, here is some code... the program runs out of memory at the time of sess(init)
#Kendall Weihe
#This is a CNN that handles 3D data
#Adjust network parameters below, also adjust data directory
import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
from tensorflow.tensorflow.scroll import scroll_data
# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 1
display_step = 1
# Network Parameters
n_images = 100
n_input_x = 396 # Input image x-dimension
n_input_y = 396 # Input image y-dimension
n_input_z = 5
n_hidden = 128
n_classes = 2 # Binary classification -- on a surface or not
n_output = n_input_x * n_classes
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y])
y = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y, n_classes], name="ground_truth")
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
def input_data():
data = np.empty((n_images, n_input_x, n_input_y))
temp = []
for i in range(n_images):
filename = "/home/volcart/Documents/Data/input_crops/cropped00" + str(i) + ".tif"
im = Image.open(path)
imarray = np.array(im)
temp.append(imarray)
for i in range(n_images):
for j in range(n_input_x):
for k in range(n_input_y):
data[i][j][k] = temp[i][j][k]
return data
# Create some wrappers for simplicity
def conv3d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool3d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, k, k, k, 1],
padding='SAME')
def deconv3d(prev_layer, w, b, output_shape, strides):
# Deconv layer
deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="VALID")
deconv = tf.nn.bias_add(deconv, b)
deconv = tf.nn.relu(deconv)
return deconv
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, n_input_z, n_input_x, n_input_y, 1])
with tf.name_scope("conv1") as scope:
# Convolution Layer
conv1 = conv3d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
#conv1 = tf.nn.local_response_normalization(conv1)
conv1 = maxpool3d(conv1, k=2)
# Convolution Layer
with tf.name_scope("conv2") as scope:
conv2 = conv3d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
# conv2 = tf.nn.local_response_normalization(conv2)
conv2 = maxpool3d(conv2, k=2)
# Convolution Layer
with tf.name_scope("conv3") as scope:
conv3 = conv3d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
# conv3 = tf.nn.local_response_normalization(conv3)
conv3 = maxpool3d(conv3, k=2)
# pdb.set_trace()
temp_batch_size = tf.shape(x)[0] #batch_size shape
with tf.name_scope("deconv1") as scope:
output_shape = [temp_batch_size, 2, n_input_x / 4, n_input_y / 4, 16]
strides = [1,2,2,2,1]
#conv4 = deconv3d(conv3, weights['wdc1'], biases['bdc1'], output_shape, strides)
# conv4 = tf.nn.local_response_normalization(conv4)
conv4 = tf.nn.conv3d_transpose(conv3, weights['wdc1'], output_shape=output_shape, strides=strides, padding="SAME")
conv4 = tf.nn.bias_add(conv4, biases['bdc1'])
conv4 = tf.nn.relu(conv4)
with tf.name_scope("deconv2") as scope:
output_shape = [temp_batch_size, 3, n_input_x / 2, n_input_y / 2, 8]
strides = [1,1,2,2,1]
conv5 = deconv3d(conv4, weights['wdc2'], biases['bdc2'], output_shape, strides)
# conv5 = tf.nn.local_response_normalization(conv5)
with tf.name_scope("deconv3") as scope:
output_shape = [temp_batch_size, n_input_z, n_input_x, n_input_y, 1]
#this time don't use ReLu -- since output layer
conv6 = tf.nn.conv3d_transpose(conv5, weights['wdc3'], output_shape=output_shape, strides=[1,1,2,2,1], padding="VALID")
conv6 = tf.nn.bias_add(conv6, biases['bdc3'])
conv6 = tf.nn.dropout(conv6, dropout)
# conv6 = tf.nn.relu(conv6)
# pdb.set_trace()
x = tf.reshape(conv6, [-1, n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True, activation=tf.nn.relu)
# lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * n_hidden, state_is_tuple=True)
lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.75)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], lstm_weights[i]) + lstm_biases[i])
return output
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1' : tf.Variable(tf.random_normal([2, 2, 2, 1, 8])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),
'wdc1' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),
'wdc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
'wdc3' : tf.Variable(tf.random_normal([3, 2, 2, 1, 8])),
}
biases = {
'bc1': tf.Variable(tf.random_normal([8])),
'bc2': tf.Variable(tf.random_normal([16])),
'bc3': tf.Variable(tf.random_normal([32])),
'bdc1': tf.Variable(tf.random_normal([16])),
'bdc2': tf.Variable(tf.random_normal([8])),
'bdc3': tf.Variable(tf.random_normal([1])),
}
lstm_weights = {}
lstm_biases = {}
for i in xrange(n_input_y * n_input_z):
lstm_weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
lstm_biases[i] = tf.Variable(tf.random_normal([n_output]))
# Construct model
with tf.name_scope("net") as scope:
print "Building network..."
pred = conv_net(x, weights, biases, keep_prob)
print "Network built!"
# pdb.set_trace()
pred = tf.transpose(tf.pack(pred),[1,0,2])
pred = tf.reshape(pred, [-1, n_input_z, n_input_x, n_input_y, n_classes])
# Reshape for cost function
temp_pred = tf.reshape(pred, [-1, n_classes])
temp_y = tf.reshape(y, [-1, n_classes])
with tf.name_scope("loss") as scope:
# cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
cost = (tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
with tf.name_scope("opt") as scope:
print "Initializing optimizer..."
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
print "optimizer initialized!"
# pdb.set_trace()
# Evaluate model
with tf.name_scope("acc") as scope:
# accuracy is the difference between prediction and ground truth matrices
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.name_scope("prediction-node") as scope:
prediction_node = tf.nn.sigmoid(temp_pred)
# Initializing the variables
with tf.name_scope("initialize-and-config") as scope:
print "Initializing variables & configuring..."
init = tf.initialize_all_variables()
saver = tf.train.Saver()
gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1
print "Variables and configurations initialized!"
# Launch the graph
with tf.Session(config=config) as sess:
print "Initializing session..."
sess.run(init)
print "Session initialized!"
print "Restoring session..."
saver.restore(sess, "/home/volcart/Documents/3D-CNN-2D-LSTM-reg-model/model.ckpt")
print "Session restored!"
tf.get_default_graph().finalize()
# Import data
print "Importing data..."
data = input_data()
print "Data imported!"
# Keep training until reach max iterations
for i in range(n_images):
print "Prediction image number -- " + str(i)
temp = []
for j in range(n_input_z):
temp.append(data[j,:,:])
temp = np.asarray(temp)
temp = temp.reshape((1, n_input_z, n_input_x, n_input_y))
prediction = sess.run(prediction_node, feed_dict={x: temp, keep_prob: 1.0})
prediction = prediction.reshape((n_input_x, n_input_y, n_classes))
temp_arr1 = np.empty((n_input_x, n_input_y))
for i in xrange(n_input_x):
for j in xrange(n_input_y):
if l == 0:
temp_arr1[i][j] = prediction[i][j][0]
csv_file = "/home/volcart/Documents/3D-CNN-2D-LSTM-pred/3D-CNN-2D-LSTM-step-" + str(i) + ".csv"
np.savetxt(csv_file, temp_arr1, delimiter=",")

Related

tensor flow error: logits and labels must be broadcastable

I am having the following error displayed while trying to get tensorflow running:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[30,2] labels_size=[8,2]
Below is my code. I obtained parts of the 1st part of the code from https://blog.francium.tech/build-your-own-image-classifier-with-tensorflow-and-keras-dc147a15e38e and the second from https://www.datacamp.com/community/tutorials/cnn-tensorflow-python. I adopted them to something I am working on where I have some images that belong to 2 different classes. For training, each image class are placed in the same training folder and for testing, each image class is placed in the same testing folder. I figure the error is referring to a mismatch between the logits and label. I have tried tweaking the shapes in the weights and biases as defined in the code below, but this didn't solve the issue. I also tried tampering with the batch size, still no solution. Does anyone have any idea what could cause this error? Could it be how I arranged my training and testing set?
ROOT_PATH = "/my/file/path/images"
train_data_directory = os.path.join(ROOT_PATH, "data/train")
test_data_directory = os.path.join(ROOT_PATH, "data/test")
train_data = train_data_directory
test_data = test_data_directory
def one_hot_label(img):
label = img.split('.')[0]
global ohl
ohl = []
if label == 'A':
ohl = np.array([1,0])
elif label == 'B':
ohl = np.array([0,1])
return ohl
def train_data_with_label():
train_images = []
for i in tqdm(os.listdir(train_data)):
path = os.path.join(train_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
train_images.append([np.array(img), one_hot_label(i)])
shuffle(train_images)
return train_images
def test_data_with_label():
test_images = []
for i in tqdm(os.listdir(test_data)):
path = os.path.join(test_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
test_images.append([np.array(img), one_hot_label(i)])
shuffle(test_images)
return test_images
training_images = train_data_with_label()
testing_images = test_data_with_label()
#both placeholders are of type float
x = tf.placeholder("float", [None, 28,28,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(2), initializer=tf.contrib.layers.xavier_initializer()),
}
def conv_net(x, weights, biases):
# here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
# here we call the conv2d function we had defined above and pass the input image x, weights wc2 and bias bc2.
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 7*7 matrix.
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 4*4.
conv3 = maxpool2d(conv3, k=2)
#print(conv3.shape)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
print(out.shape)
return out
#print(out.shape)
pred = conv_net(x, weights, biases)
#pred.shape
#labelsa = tf.constant(1., shape=y.shape)
#logsa = tf.constant(1., shape=pred.shape)
#labels = labels + tf.zeros_like(logsa)
print(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
print(y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
summary_writer = tf.summary.FileWriter('./Output', sess.graph)
for i in range(training_iters):
#print('here')
for batch in range(len(train_X)//batch_size):
print('here')
#offset = (batch * batch_size) % (train_Y.shape[0] - batch_size)
batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
# Run optimization op (backprop).
# Calculate batch loss and accuracy
print(batch_y.shape)
opt = sess.run(optimizer, feed_dict={x: batch_x,
y: batch_y})
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
print("Iter " + str(i) + ", Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for all 10000 mnist test images
test_acc,valid_loss = sess.run([accuracy,cost], feed_dict={x: test_X,y: test_Y})
train_loss.append(loss)
test_loss.append(valid_loss)
train_accuracy.append(acc)
test_accuracy.append(test_acc)
print("Testing Accuracy:","{:.5f}".format(test_acc))
summary_writer.close()

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

Output of loss is None

I have to finetune VGG.There are five convolutional layers and then three fully connected layers. Output from the last fully connected layer is the input of the loss function. Following is my code:
class vgg16:
def __init__(self, imgs1,imgs2, weights=None, sess=None):
self.imgs1 = imgs1
self.imgs2 = imgs2
with tf.variable_scope("siamese") as scope:
self.o1 = self.convlayers(imgs1)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
scope.reuse_variables()
self.o2 = self.convlayers(imgs2)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
#create loss function
def convlayers(self,imgs):
....
# conv1_2
with tf.name_scope('conv1_2') as scope:
......
# pool1
..
)
.....
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
....
# fc2
with tf.name_scope('fc2') as scope:
...
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
def loss(self):
loss=tf.nn.l2_loss(self.fc3l)
self.train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
if __name__ == '__main__':
sess = tf.Session()
imgs1 = tf.placeholder(tf.float32, [None, 224, 224, 3])#jis size ka bhi imaeg hai usko 224x224 may kar diya or RGB chaeay hmay
imgs2 = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
img1 = imread('laska.png', mode='RGB')
img1 = imresize(img1, (224, 224))
img2 = imread('laska2.jpg', mode='RGB')
img2 = imresize(img2,(224, 224))
prob = sess.run(vgg.train_step, feed_dict={vgg.imgs1: [img1],vgg.imgs2: [img2]})
print('loss is:')
print(prob)
The problem is that the output of prob is None. Kindly indicate what I am doing wrong.
PS: I am following siamese architecture. Input to both branches are different images here.
The op self.train_step does not return anything, it just calculates gradients and updates variables. See here.
What you need to do is to save reference to loss tensor in your vgg16 class like this:
self.loss=tf.nn.l2_loss(self.fc3l)
and then execute both train_step and loss operations in single sess.run:
_, loss_value = sess.run([vgg.train_step, vgg.loss], feed_dict=...)
print('loss is:')
print(loss_value)

How can this tensorflow model converge on a CPU but not on a GPU?

We ran into the strange problem that our relatively simple model converges on the CPU, but not on the server with GPU. No modifications to the code are done whatsoever between the two runs. Nor does the code contain any explicit conditional statements to change the workflow on different architectures.
What could possibly be the reason?
How can this tensorflow model converge on a CPU but not on a GPU?
In the likely event that the code is too long for you to read we are still thankful about general speculations and hints.
#!/usr/bin/python
from __future__ import print_function
import tensorflow as tf
import os
import numpy as np
import input_data # copy from tensorflow/examples/tutorials/mnist/input_data.py
# wget https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/tutorials/mnist/input_data.py if needed
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
force_gpu = False
debug = True # histogram_summary ...
# _cpu='/cpu:0'
default_learning_rate=0.001
tensorboard_logs = '/tmp/tensorboard-logs/'
# $(sleep 5; open http://0.0.0.0:6006) & tensorboard --debug --logdir=/tmp/tensorboard-logs/
class net():
def __init__(self,model,data,name=0,learning_rate=default_learning_rate,batch_size=64):
self.session=sess=session=tf.Session()
self.model=model
self.data=data # assigned to self.x=net.input via train
self.batch_size=batch_size
self.layers=[]
self.last_width=self.input_width(data)
self.learning_rate=learning_rate
self.generate_model(model)
def generate_model(self,model, name=''):
if not model: return self
with tf.name_scope('state'):
self.keep_prob = tf.placeholder(tf.float32) # 1 for testing! else 1 - dropout
self.train_phase = tf.placeholder(tf.bool, name='train_phase')
self.global_step = tf.Variable(0) # dont set, feed or increment global_step, tensorflow will do it automatically
with tf.name_scope('data'):
n_input=28*28
n_classes=10
self.x = x = self.input = tf.placeholder(tf.float32, [None, n_input])
self.last_layer=x
self.y = y = self.target = tf.placeholder(tf.float32, [None, n_classes])
if not force_gpu: tf.image_summary("mnist", tf.reshape(self.x, [-1, 28, 28, 1], "mnist_images"))
with tf.name_scope('model'):
model(self)
if(self.last_width!=n_classes): self.classifier() # 10 classes auto
def input_width(self,data):
return 28*28
def add(self, layer):
self.layers.append(layer)
self.last_layer = layer
self.last_shape = layer.get_shape()
def reshape(self,shape):
self.last_layer = tf.reshape(self.last_layer,shape)
self.last_shape = shape
self.last_width = shape[-1]
def batchnorm(self):
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
with tf.name_scope('batchnorm') as scope:
input = self.last_layer
train_op=batch_norm(input, is_training=True, center=False, updates_collections=None, scope=scope)
test_op=batch_norm(input, is_training=False, updates_collections=None, center=False,scope=scope, reuse=True)
self.add(tf.cond(self.train_phase,lambda:train_op,lambda:test_op))
# Fully connected layer
def dense(self, hidden=1024, depth=1, act=tf.nn.tanh, dropout=False, parent=-1): #
if parent==-1: parent=self.last_layer
shape = self.last_layer.get_shape()
if shape and len(shape)>2:
self.last_width= int(shape[1]*shape[2]*shape[3])
print("reshapeing ",shape,"to",self.last_width)
parent = tf.reshape(parent, [-1, self.last_width])
width = hidden
while depth>0:
with tf.name_scope('Dense_{:d}'.format(hidden)) as scope:
print("Dense ", self.last_width, width)
nr = len(self.layers)
# if self.last_width == width:
# M = closest_unitary(np.random.rand(self.last_width, width) / (self.last_width + width))
# weights = tf.Variable(m, name="weights_dense_" + str(nr))
# else:
weights = tf.Variable(tf.random_uniform([self.last_width, width], minval=-1. / width, maxval=1. / width), name="weights_dense")
bias = tf.Variable(tf.random_uniform([width],minval=-1./width,maxval=1./width), name="bias_dense")
dense1 = tf.matmul(parent, weights, name='dense_'+str(nr))+ bias
tf.histogram_summary('dense_'+str(nr),dense1)
tf.histogram_summary('weights_'+str(nr),weights)
tf.histogram_summary('bias_'+str(nr),bias)
tf.histogram_summary('dense_'+str(nr)+'/sparsity', tf.nn.zero_fraction(dense1))
tf.histogram_summary('weights_'+str(nr)+'/sparsity', tf.nn.zero_fraction(weights))
if act: dense1 = act(dense1)
# if norm: dense1 = self.norm(dense1,lsize=1) # SHAPE!
if dropout: dense1 = tf.nn.dropout(dense1, self.keep_prob)
self.layers.append(dense1)
self.last_layer = parent = dense1
self.last_width = width
depth=depth-1
self.last_shape=[-1,width] # dense
# Convolution Layer
def conv(self,shape,act=tf.nn.relu,pool=True,dropout=False,norm=True,name=None): # True why dropout bad in tensorflow??
with tf.name_scope('conv'):
print("input shape ",self.last_shape)
print("conv shape ",shape)
width=shape[-1]
filters=tf.Variable(tf.random_normal(shape))
# filters = tf.Variable(tf.random_uniform(shape, minval=-1. / width, maxval=1. / width), name="filters")
_bias=tf.Variable(tf.random_normal([shape[-1]]))
# # conv1 = conv2d('conv', _X, _weights, _bias)
conv1=tf.nn.bias_add(tf.nn.conv2d(self.last_layer,filter=filters, strides=[1, 1, 1, 1], padding='SAME'), _bias)
if debug: tf.histogram_summary('conv_' + str(len(self.layers)), conv1)
if act: conv1=act(conv1)
if pool: conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
if norm: conv1 = tf.nn.lrn(conv1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
if debug: tf.histogram_summary('norm_' + str(len(self.layers)), conv1)
if dropout: conv1 = tf.nn.dropout(conv1,self.keep_prob)
print("output shape ",conv1.get_shape())
self.add(conv1)
def classifier(self,classes=10): # Define loss and optimizer
with tf.name_scope('prediction'):# prediction
if self.last_width!=classes:
# print("Automatically adding dense prediction")
self.dense(hidden=classes, act= False, dropout = False)
# cross_entropy = -tf.reduce_sum(y_*y)
with tf.name_scope('classifier'):
y_=self.target
manual=False # True
if classes>100:
print("using sampled_softmax_loss")
y=prediction=self.last_layer
self.cost = tf.reduce_mean(tf.nn.sampled_softmax_loss(y, y_)) # for big vocab
elif manual:
# prediction = y =self.last_layer=tf.nn.softmax(self.last_layer)
# self.cost = cross_entropy = -tf.reduce_sum(y_ * tf.log(y+ 1e-10)) # against NaN!
prediction = y = tf.nn.log_softmax(self.last_layer)
self.cost = cross_entropy = -tf.reduce_sum(y_ * y)
else:
y = prediction = self.last_layer
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) # prediction, target
# if not gpu:
tf.scalar_summary('cost', self.cost)
# self.cost = tf.Print(self.cost , [self.cost ], "debug cost : ")
learning_scheme=self.learning_rate
# learning_scheme=tf.train.exponential_decay(self.learning_rate, self.global_step, decay_steps, decay_size)
self.optimizer = tf.train.AdamOptimizer(learning_scheme).minimize(self.cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(self.target, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
if not force_gpu: tf.scalar_summary('accuracy', self.accuracy)
# Launch the graph
def next_batch(self,batch_size=10):
return self.data.train.next_batch(batch_size)
def train(self,steps=-1,dropout=None,display_step=10,test_step=200): #epochs=-1,
steps = 9999999 if steps==-1 else steps
session=self.session
# with tf.device(_cpu):
# import tensorflow.contrib.layers as layers
# t = tf.verify_tensor_all_finite(t, msg)
tf.add_check_numerics_ops()
self.summaries = tf.merge_all_summaries()
self.summary_writer = tf.train.SummaryWriter(tensorboard_logs, session.graph) #
if not dropout:dropout=1. # keep all
x=self.x
y=self.y
keep_prob=self.keep_prob
session.run([tf.initialize_all_variables()])
step = 0 # show first
while step < steps:
# print("step %d \r" % step)# end=' ')
batch_xs, batch_ys = self.next_batch(self.batch_size)
# tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
# Fit training using batch data
feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True}
loss,_= session.run([self.cost,self.optimizer], feed_dict=feed_dict)
if step % test_step == 0: self.test(step)
if step % display_step == 0:
# Calculate batch accuracy, loss
feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False}
acc , summary = session.run([self.accuracy,self.summaries], feed_dict=feed)
# self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve
print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f}".format(step,loss,acc),end=' ')
if str(loss)=="nan": return print("\nLoss gradiant explosion, exiting!!!") #restore!
step += 1
print("\nOptimization Finished!")
self.test(step,number=10000) # final test
def inputs(self,data):
self.inputs, self.labels = load_data()#...)
def test(self,step,number=400):#256
session=sess=self.session
run_metadata = tf.RunMetadata()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
# Calculate accuracy for 256 mnist test images
test_labels = self.data.test.labels[:number]
test_images = self.data.test.images[:number]
feed_dict = {self.x: test_images, self.y: test_labels, self.keep_prob: 1., self.train_phase:False}
accuracy,summary= self.session.run([self.accuracy, self.summaries], feed_dict=feed_dict)
# accuracy,summary = session.run([self.accuracy, self.summaries], feed_dict, run_options, run_metadata)
print('\t'*3+"Test Accuracy:",accuracy)
# self.summary_writer.add_run_metadata(run_metadata, 'step #%03d' % step)
self.summary_writer.add_summary(summary,global_step=step)
def dense(net): # best with lr ~0.001
# type: (layer.net) -> None
# net.batchnorm() # start lower, else no effect
# net.dense(400,act=None)# # ~95% we can do better:
net.dense(400, act=tf.nn.tanh)# 0.996 YAY only 0.985 on full set, Step 5000 flat
return # 0.957% without any model!!
def alex(net):
# type: (layer.net) -> None
print("Building Alex-net")
net.reshape(shape=[-1, 28, 28, 1]) # Reshape input pictures
# net.batchnorm()
net.conv([3, 3, 1, 64])
net.conv([3, 3, 64, 128])
net.conv([3, 3, 128, 256])
net.dense(1024,act=tf.nn.relu)
net.dense(1024,act=tf.nn.relu)
# net=layer.net(dense,data=mnist, learning_rate=0.01 )#,'mnist' baseline
_net=net(alex,data=mnist, learning_rate=0.001)#,'mnist'
_net.train(50000,dropout=0.6,display_step=1,test_step=10)
In general floating point computations can be a little nondeterministic when it comes to adding many numbers (and some GPUs are buggy). Did you try retuning hyperparameters (varying learning rates and whatnot) to account for this?
I faced the same problem in the past. I solved it by following the tutorial at this link: https://www.tensorflow.org/install/pip#windows-native
Apparently, conda installation of TensorFlow is not recommended.
As an extra tip, if you're working with GPU avoid using TensorFlow 2.3. Apparently, it has some installation issues.

Tensorflow training got stuck after some steps, how to investigate?

I have a python script to train a Tensorflow model similar to the one in CIFAR-10 tutorial. I have 20500 training examples and am using 128 examples per batch. I set 1,000,000 as the max number of steps. However after about 164,000 steps, the python script seems stuck somewhere. Is there any way to find out where the script is stuck? My last resort would be using Ctrl-C to terminate the process and force it to print out a backtrace. But I wonder if there are other things I should check before I kill the process.
Here's the train loop:
def train(trainingData, batchSize, workingDir, maxSteps):
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
image, label = readData(trainingData)
minAfterDequeue = 5000
capacity = minAfterDequeue + 3 * batchSize
imageBatch, labelBatch = tf.train.shuffle_batch([image, label], batch_size=batchSize, capacity=capacity, min_after_dequeue=minAfterDequeue)
#labelBatch = tf.reshape(labelBatch, [batchSize, 1])
#tf.image_summary('images', imageBatch)
#tf.histogram_summary('labels', tf.cast(labelBatch, tf.float32))
logits = network.inference(imageBatch, 0.5)
#floatLabel = tf.cast(labelBatch, tf.float32)
#cross_entropy_per_example = tf.nn.softmax_cross_entropy_with_logits(logits, floatLabel)
loss, cross_entropy = network.loss(logits, labelBatch)
train_op = network.train(loss, global_step, batchSize)
# Create a saver
saver = tf.train.Saver(tf.all_variables())
summary_op = tf.merge_all_summaries()
session = tf.Session()
init = tf.initialize_all_variables()
session.run(init)
tf.train.start_queue_runners(sess=session)
summary_writer = tf.train.SummaryWriter(workingDir, session.graph_def)
for step in xrange(maxSteps):
start_time = time.time()
#l, sm, ce = session.run([floatLabel, logits, cross_entropy_per_example])
#print l
#print sm
#print ce
_, loss_value = session.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
examples_per_sec = batchSize / duration
format_str = "%s: step %d, loss = %e (%.1f examples/sec; %.3f sec/batch"
print (format_str % (datetime.now(), step, loss_value, examples_per_sec, float(duration)))
if step % 100 == 0:
summary_str = session.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0 or (step + 1) == maxSteps:
checkpoint_path = os.path.join(workingDir, 'model.ckpt')
saver.save(session, checkpoint_path, global_step = step)
And here's the various functions used to construct the graph:
import re
import tensorflow as tf
TOWER_NAME="tower"
NUM_EXAMPLES_PER_EPOCH = 50000
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.95 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.01 # Initial learning rate.
def _activation_summary(x):
"""Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measure the sparsity of activations.
Args:
x: Tensor
Returns:
nothing
"""
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
#numChannel = tf.shape(x)[3]
#tf.image_summary(tensor_name + '/image', tf.reshape(x)
def _variable_on_cpu(name, shape, initializer):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
return var
def _variable_with_weight_decay(name, shape, stddev, wd=None):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
Returns:
Variable Tensor
"""
var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def inference(images, dropout):
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 1, 32], stddev=5e-2)
conv = tf.nn.conv2d(images, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 32, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# pool2
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
with tf.variable_scope('conv3') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool2, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
# pool 3
pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# fully connected 4
with tf.variable_scope('full4') as scope:
batchSize = pool3.get_shape()[0].value
flattened = tf.reshape(pool3, [batchSize, -1])
dim = flattened.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 256], stddev=5e-2)
biases = _variable_on_cpu('biases', [256], tf.constant_initializer(0.1))
full4 = tf.nn.relu(tf.matmul(flattened, weights) + biases, name=scope.name)
full4_dropout = tf.nn.dropout(full4, dropout)
_activation_summary(full4)
#_activation_summary(full4_dropout)
# fully connected 5
with tf.variable_scope('full5') as scope:
weights = _variable_with_weight_decay('weights', [256, 128], stddev=5e-2)
biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
full5 = tf.nn.relu(tf.matmul(full4_dropout, weights) + biases, name=scope.name)
full5_dropout = tf.nn.dropout(full5, dropout)
_activation_summary(full5)
#_activation_summary(full5_dropout)
# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [128, 2], stddev=1/128.0)
biases = _variable_on_cpu('biases', [2], tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(full5_dropout, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
def loss(logits, labels):
labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss'), cross_entropy_mean
def _add_loss_summaries(total_loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(loss, step, batchSize):
numBatchesPerEpoch = NUM_EXAMPLES_PER_EPOCH / batchSize
decay_steps = int(numBatchesPerEpoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
loss_averages_op = _add_loss_summaries(loss)
# compute gradients
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(loss)
# apply gradients
apply_gradient_op = opt.apply_gradients(grads, global_step = step)
# add histograms for trainable variables
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
# add histograms for gradients:
for grad, var in grads:
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
# Track the moving average of all trainable variables
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variable_averages_op]):
train_op = tf.no_op(name='train')
return train_op