How does the reuse option in tf.variable_scope work? - tensorflow

I have a following problem, I am writing a simple code to learn how tensorflow works and I am defining the variables for convolution with help of tf.variable_scope. However everytime I try to run this script I get a ValueError saying either to set reuse=None or reuse=True.
Can somebody explain why doesn't it just run the function without defining this option or what would be a solution for that?
My code is:
import re
import tensorflow as tf
import numpy as np
data = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/data.npy')
labels = np.load('/home/joanna/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = np.zeros((16400,))
labels[10001:16400]=1
labels = labels.astype(int)
data = data.astype(np.float32)
#labels = tf.cast(labels,tf.int64)
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # Initial learning rate.
NUM_CLASSES=2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN= 1000
batch_size=300
def _variable_on_cpu(name, shape, initializer):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer = initializer, dtype = dtype)
return var
def _add_loss_summaries(loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def _variable_with_weight_decay(name, shape, stddev, wd):
dtype = tf.float32
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def _activation_summary(x):
tensor_name = re.sub('_[0-9]*/','', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def iterate_batches(data, labels, batch_size, num_epochs):
N = int(labels.shape[0])
batches_per_epoch = int(N/batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield data[start:stop,:,:,:], labels[start:stop]
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0)
x_tensor = tf.placeholder(tf.float32, shape=(batch_size, 3000,1,1))
y_tensor = tf.placeholder(tf.int64, shape=(batch_size,))
for x,y in iterate_batches(data,labels, 300,1):
print('yey!')
with tf.variable_scope('conv1',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights',
shape=[100,1,1,64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(x_tensor, kernel, [1,3,1,1], padding = 'SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1,20,1,1], strides=[1,2,1,1], padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
with tf.variable_scope('conv2',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [50,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1,3,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,10,1,1], strides=[1,2,1,1], padding='SAME', name='pool2')
with tf.variable_scope('conv3',reuse=True) as scope:
kernel = _variable_with_weight_decay('weights', [30,1,64,64], stddev = 5e-2, wd=0.0)
conv = tf.nn.conv2d(pool2, kernel, [1,10,1,1], padding='SAME')
biases = _variable_on_cpu('biases',[64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv,biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001/9.0, beta = 0.75, name='norm3')
pool3 = tf.nn.max_pool(norm3, ksize=[1,9,1,1], strides=[1,9,1,1], padding='SAME', name='pool3')
with tf.variable_scope('fc4',reuse=True) as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool3, [batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 64], stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc4 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(fc4)
with tf.variable_scope('fc5',reuse=True) as scope:
weights = _variable_with_weight_decay('weights', shape=[64, 64],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
fc5 = tf.nn.relu(tf.matmul(fc4, weights) + biases, name=scope.name)
_activation_summary(fc5)
with tf.variable_scope('softmax_linear',) as scope:
weights = _variable_with_weight_decay('weights', [64, NUM_CLASSES],
stddev=1/64.0, wd=0.0)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(fc5, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(softmax_linear, y_tensor, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
kupa = tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
#neu
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
loss_averages_op = _add_loss_summaries(loss)
summary_op = tf.merge_all_summaries()
#neu
init = tf.initialize_all_variables()
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
sess.run([conv, bias, conv1, pool1, norm1, conv2,norm2, pool2, conv3, norm3, pool3,fc4,fc5], feed_dict={x_tensor:x, y_tensor:y})
sess.run([softmax_linear,loss], feed_dict={x_tensor:x, y_tensor:y})
sess.run([lr, loss_averages_op, summary_op], feed_dict={x_tensor:x, y_tensor:y})

The problem is with this line here:
for x,y in iterate_batches(data,labels, 300,1):
This will recreate the graph however many times which is a bad thing to do as it'll take up more memory each time (this isn't always the case but it can happen).
The reuse=True comes in something like this example below when defining the graph.
# First call creates one set of variables.
result1 = my_image_filter(image1)
# Another set of variables is created in the second call.
result2 = my_image_filter(image2)
Tensorflow doesn't know whether or not you want to "reuse" the variables as in should they share the same parameters or not.
In your specific case by looping your recreating the parameters each time and telling tensorflow to simply reuse the variables.
It would be better if you could move the for loop to after the graph creation has already occurred and then you could get rid of the reuse=True everywhere.

Related

Creating several weight tensors for each object in Multi-Object Tracking (MOT) using TensorFlow

I am using TensorFlow V1.10.0 and developing a Multi-Object Tracker based on MDNet. I need to assign a separate weight matrix for each detected object for the fully connected layers in order to get different embedding for each object during online training. I am using this tf.map_fn in order to generate a higher-order weight tensor (n_objects, flattened layer, hidden_units),
'''
def dense_fc4(n_objects):
initializer = lambda: tf.contrib.layers.xavier_initializer()(shape=(1024, 512))
return tf.Variable(initial_value=initializer, name='fc4/kernel',
shape=(n_objects.shape[0], 1024, 512))
W4 = tf.map_fn(dense_fc4, samples_flat)
b4 = tf.get_variable('fc4/bias', shape=512, initializer=tf.zeros_initializer())
fc4 = tf.add(tf.matmul(samples_flat, W4), b4)
fc4 = tf.nn.relu(fc4)
'''
However during execution when I run the session for W4 I get a weight matrix but all having the same values. Any help?
TIA
Here is a workaround, I was able to generate the multiple kernels outside the graph in a for loop and then giving it to the graph:
w6 = []
for n_obj in range(pos_data.shape[0]):
w6.append(tf.get_variable("fc6/kernel-" + str(n_obj), shape=(512, 2),
initializer=tf.contrib.layers.xavier_initializer()))
print("modeling fc6 branches...")
prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6 = build_branches(fc5, w6)
def build_branches(fc5, w6):
y = tf.placeholder(tf.int64, [None, None])
b6 = tf.get_variable('fc6/bias', shape=2, initializer=tf.zeros_initializer())
fc6 = tf.add(tf.matmul(fc5, w6), b6)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=fc6))
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="fc6")
with tf.variable_scope("", reuse=tf.AUTO_REUSE):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, name='adam')
train_op = optimizer.minimize(loss, var_list=train_vars)
initialize_vars = train_vars
initialize_vars += [optimizer.get_slot(var, name)
for name in optimizer.get_slot_names()
for var in train_vars]
if isinstance(optimizer, tf.train.AdamOptimizer):
initialize_vars += optimizer._get_beta_accumulators()
prob = tf.nn.softmax(fc6)
pred = tf.argmax(prob, 2)
correct_pred = tf.equal(pred, y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return prob, train_op, accuracy, loss, pred, initialize_vars, y, fc6

tensor flow error: logits and labels must be broadcastable

I am having the following error displayed while trying to get tensorflow running:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[30,2] labels_size=[8,2]
Below is my code. I obtained parts of the 1st part of the code from https://blog.francium.tech/build-your-own-image-classifier-with-tensorflow-and-keras-dc147a15e38e and the second from https://www.datacamp.com/community/tutorials/cnn-tensorflow-python. I adopted them to something I am working on where I have some images that belong to 2 different classes. For training, each image class are placed in the same training folder and for testing, each image class is placed in the same testing folder. I figure the error is referring to a mismatch between the logits and label. I have tried tweaking the shapes in the weights and biases as defined in the code below, but this didn't solve the issue. I also tried tampering with the batch size, still no solution. Does anyone have any idea what could cause this error? Could it be how I arranged my training and testing set?
ROOT_PATH = "/my/file/path/images"
train_data_directory = os.path.join(ROOT_PATH, "data/train")
test_data_directory = os.path.join(ROOT_PATH, "data/test")
train_data = train_data_directory
test_data = test_data_directory
def one_hot_label(img):
label = img.split('.')[0]
global ohl
ohl = []
if label == 'A':
ohl = np.array([1,0])
elif label == 'B':
ohl = np.array([0,1])
return ohl
def train_data_with_label():
train_images = []
for i in tqdm(os.listdir(train_data)):
path = os.path.join(train_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
train_images.append([np.array(img), one_hot_label(i)])
shuffle(train_images)
return train_images
def test_data_with_label():
test_images = []
for i in tqdm(os.listdir(test_data)):
path = os.path.join(test_data,i)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28,28))
test_images.append([np.array(img), one_hot_label(i)])
shuffle(test_images)
return test_images
training_images = train_data_with_label()
testing_images = test_data_with_label()
#both placeholders are of type float
x = tf.placeholder("float", [None, 28,28,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(2), initializer=tf.contrib.layers.xavier_initializer()),
}
def conv_net(x, weights, biases):
# here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
# here we call the conv2d function we had defined above and pass the input image x, weights wc2 and bias bc2.
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 7*7 matrix.
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 4*4.
conv3 = maxpool2d(conv3, k=2)
#print(conv3.shape)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
print(out.shape)
return out
#print(out.shape)
pred = conv_net(x, weights, biases)
#pred.shape
#labelsa = tf.constant(1., shape=y.shape)
#logsa = tf.constant(1., shape=pred.shape)
#labels = labels + tf.zeros_like(logsa)
print(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
print(y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
summary_writer = tf.summary.FileWriter('./Output', sess.graph)
for i in range(training_iters):
#print('here')
for batch in range(len(train_X)//batch_size):
print('here')
#offset = (batch * batch_size) % (train_Y.shape[0] - batch_size)
batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
# Run optimization op (backprop).
# Calculate batch loss and accuracy
print(batch_y.shape)
opt = sess.run(optimizer, feed_dict={x: batch_x,
y: batch_y})
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
print("Iter " + str(i) + ", Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for all 10000 mnist test images
test_acc,valid_loss = sess.run([accuracy,cost], feed_dict={x: test_X,y: test_Y})
train_loss.append(loss)
test_loss.append(valid_loss)
train_accuracy.append(acc)
test_accuracy.append(test_acc)
print("Testing Accuracy:","{:.5f}".format(test_acc))
summary_writer.close()

Providing output from one model to another model

I want to provide the output from one model (f) into another model (c). The following code works
features_ = sess.run(f.features, feed_dict={x:x_, y:y_, dropout:1.0, training:False})
sess.run(c.optimize, feed_dict={x:x_, y:y_, features:features_, dropout:1.0, training:False})
c only needs features_ and y_. It does not need x_. However, if I try to remove x_ as an input, i.e.,
feed_dict={y:y_, features:features_}
I get the following error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,28,28,1]
[[Node: Placeholder = Placeholderdtype=DT_FLOAT, shape=[?,28,28,1], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Is there a reason for this? features_ is a numpy ndarray, so it doesn't seem to be a tensor type or anything like that.
Here is the code for f:
class ConvModelSmall(object):
def __init__(self, x, y, settings, num_chan, num_features, lr, reg, dropout, training, scope):
""" init the model with hyper-parameters etc """
self.x = x
self.y = y
self.dropout = dropout
self.training = training
initializer = tf.contrib.layers.xavier_initializer(uniform=False)
self.weights = get_parameters(scope=scope, initializer=initializer, dims)
self.biases = get_parameters(scope=scope, initializer=initializer, dims)
self.features = self.feature_model()
self.acc = settings.acc(self.features, self.y)
self.loss = settings.loss(self.features, self.y) + reg * reg_loss_fn(self.weights)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.optimize = tf.train.AdagradOptimizer(lr).minimize(self.loss)
def feature_model(self):
conv1 = conv2d('conv1', self.x, self.weights['wc1'], self.biases['bc1'], 2, self.training, self.dropout)
conv2 = conv2d('conv2', conv1, self.weights['wc2'], self.biases['bc2'], 2, self.training, self.dropout)
conv3 = conv2d('conv3', conv2, self.weights['wc3'], self.biases['bc3'], 2, self.training, self.dropout)
dense1_reshape = tf.reshape(conv3, [-1, self.weights['wd1'].get_shape().as_list()[0]])
dense1 = fc_batch_relu(dense1_reshape, self.weights['wd1'], self.biases['bd1'], self.training, self.dropout)
dense2 = fc_batch_relu(dense1, self.weights['wd2'], self.biases['bd2'], self.training, self.dropout)
out = tf.matmul(dense2, self.weights['wout']) + self.biases['bout']
return out
Here is the code for c:
class LinearClassifier(object):
def __init__(self, features, y, training, num_features, num_classes, lr, reg, scope=""):
self.features = features
self.y = y
self.num_features = num_features
self.num_classes = num_classes
initializer = tf.contrib.layers.xavier_initializer(uniform=False)
self.W = get_scope_variable(scope=scope, var="W", shape=[num_features, num_classes], initializer=initializer)
self.b = get_scope_variable(scope=scope, var="b", shape=[num_classes], initializer=initializer)
scores = tf.matmul(tf.layers.batch_normalization(self.features, training=training), self.W) + self.b
self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=scores)) + reg * tf.nn.l2_loss(self.W)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.optimize = tf.train.GradientDescentOptimizer(lr).minimize(self.loss)
The devil is probably in these lines:
update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS )
with tf.control_dependencies(update_ops):
self.optimize = tf.train.GradientDescentOptimizer( lr ).minimize( self.loss )
By the time you define c, f is already defined, so when you say update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) it will collect all the update ops in the current graph. That will include the ops related to f, x within it.
Then with tf.control_dependencies(update_ops): means "you should do the following only after all update_ops are executed, including having given a value to x. But there is no value for x and the error happens.
To get around this, you can either separate the two networks into two different tf.Graphs, or, probably easier, when you get the update_ops you should filter them by scope in the tf.get_collection() method. For that to work, you should add tf.name_scopes to your network classes ConvModelSmall and LinearClassifier.

Fully Convolutional Network, Training Error

I apologize that I'm not good at English.
I'm trying to build my own Fully Convolutional Network using TensorFlow.
But I have difficulties on training this model with my own image data, whereas the MNIST data worked properly.
Here is my FCN model code: (Not using pre-trained or pre-bulit model)
import tensorflow as tf
import numpy as np
Loading MNIST Data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
images_flatten = tf.placeholder(tf.float32, shape=[None, 784])
images = tf.reshape(images_flatten, [-1,28,28,1]) # CNN deals with 3 dimensions
labels = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32) # Dropout Ratio
Convolutional Layers
# Conv. Layer #1
W1 = tf.Variable(tf.truncated_normal([3, 3, 1, 4], stddev = 0.1))
b1 = tf.Variable(tf.truncated_normal([4], stddev = 0.1))
FMA = tf.nn.conv2d(images, W1, strides=[1,1,1,1], padding='SAME')
# FMA stands for Fused Multiply Add, which means convolution
RELU = tf.nn.relu(tf.add(FMA, b1))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #2
W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 8], stddev = 0.1))
b2 = tf.Variable(tf.truncated_normal([8], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W2, strides=[1,1,1,1], padding='SAME')
RELU = tf.nn.relu(tf.add(FMA, b2))
POOL = tf.nn.max_pool(RELU, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# Conv. Layer #3
W3 = tf.Variable(tf.truncated_normal([7, 7, 8, 16], stddev = 0.1))
b3 = tf.Variable(tf.truncated_normal([16], stddev = 0.1))
FMA = tf.nn.conv2d(POOL, W3, strides=[1,1,1,1], padding='VALID')
RELU = tf.nn.relu(tf.add(FMA, b3))
# Dropout
Dropout = tf.nn.dropout(RELU, keep_prob)
# Conv. Layer #4
W4 = tf.Variable(tf.truncated_normal([1, 1, 16, 10], stddev = 0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev = 0.1))
FMA = tf.nn.conv2d(Dropout, W4, strides=[1,1,1,1], padding='SAME')
LAST_RELU = tf.nn.relu(tf.add(FMA, b4))
Summary: [Conv-ReLU-Pool] - [Conv-ReLU-Pool] - [Conv-ReLU] - [Dropout] - [Conv-ReLU]
Define Loss, Accuracy
prediction = tf.squeeze(LAST_RELU)
# Because FCN returns (1 x 1 x class_num) in training
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, labels))
# First arg is 'logits=' and the other one is 'labels='
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
label_max = tf.argmax(labels, 1)
pred_max = tf.argmax(prediction, 1)
correct_pred = tf.equal(pred_max, label_max)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Training Model
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10000):
image_batch, label_batch = mnist.train.next_batch(100)
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0:
tr = sess.run([loss, accuracy], feed_dict={images: image_batch, labels: label_batch, keep_prob: 1.0})
print("Step %d, Loss %g, Accuracy %g" % (i, tr[0], tr[1]))
Loss: 0.784 (Approximately)
Accuracy: 94.8% (Approximately)
The problem is that, training this model with MNIST data worked very well, but with my own data, loss is always same(0.6319), and the output layer is always 0.
There is no difference with the code, excepting for the third convolutional layer's filter size. This filter size and input size which is compressed by previous pooling layers, must have same width & height. That's why the filter size in this layer is [7,7].
What is wrong with my model?..
The only different code between two cases (MNIST, my own data) is:
Placeholder
My own data has (128 x 64 x 1) and the label is 'eyes', 'not_eyes'
images = tf.placeholder(tf.float32, [None, 128, 64, 1])
labels = tf.placeholder(tf.int32, [None, 2])
3rd Convolutional Layer
W3 = tf.Variable(tf.truncated_normal([32, 16, 8, 16], stddev = 0.1))
Feeding (Batch)
image_data, label_data = input_data.get_batch(TRAINING_FILE, 10)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10000):
image_batch, label_batch = sess.run([image_data, label_data])
sess.run(train, feed_dict={images: image_batch, labels: label_batch, keep_prob: 0.8})
if i % 10 == 0: ... # Validation part is almost same, too...
coord.request_stop()
coord.join(threads)
Here "input_data" is an another python file in the same directory, and "get_batch(TRAINING_FILE, 10)" is the function that returns batch data. The code is:
def get_input_queue(txtfile_name):
images = []
labels = []
for line in open(txtfile_name, 'r'): # Here txt file has data's path, label, label number
cols = re.split(',|\n', line)
labels.append(int(cols[2]))
images.append(tf.image.decode_jpeg(tf.read_file(cols[0]), channels = 1))
input_queue = tf.train.slice_input_producer([images, labels], shuffle = True)
return input_queue
def get_batch(txtfile_name, batch_size):
input_queue = get_input_queue(txtfile_name)
image = input_queue[0]
label = input_queue[1]
image = tf.reshape(image, [128, 64, 1])
batch_image, batch_label = tf.train.batch([image, label], batch_size)
batch_label_one_hot = tf.one_hot(tf.to_int64(batch_label), 2, on_value=1.0, off_value=0.0)
return batch_image, batch_label_one_hot
It seems not to have any problem .... :( Please Help me..!!
Are your inputs scaled appropriately?. The jpegs are in [0-255] range and it needs to be scaled to [-1 - 1]. You can try:
image = tf.reshape(image, [128, 64, 1])
image = tf.scalar_mul((1.0/255), image)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
What is the accuracy you are getting with your model for MNIST? It would be helpful if you post the code. Are you using the trained model to evaluate the output for your own data.
A general suggestion on setting up the convolution model is provided here.
Here is the model suggestion according to the article :-
INPUT -> [[CONV -> RELU]*N -> POOL?]*M -> [FC -> RELU]*K -> FC
Having more than one layers of CONV->RELU pair before pooling improves learning complex features. Try with N=2 instead of 1.
Some other suggestions:
While you are preparing your data reduce it to smaller size than 128x64. Try same size as the MNIST data ..
image = tf.reshape(image, [28, 28, 1])
If your eye/noeye image is color, then convert it to greyscale and normalize the values to unity range. You can do this using numpy or tf, here is how using numpy
grayscale-->
img = np.dot(np.array(img, dtype='float32'), [[0.2989],[0.5870],[0.1140]])
normalize-->
mean = np.mean(img, dtype='float32')
std = np.std(img, dtype='float32', ddof=1)
if std < 1e-4: std = 1.
img = (img - mean) / std

Tensorflow training got stuck after some steps, how to investigate?

I have a python script to train a Tensorflow model similar to the one in CIFAR-10 tutorial. I have 20500 training examples and am using 128 examples per batch. I set 1,000,000 as the max number of steps. However after about 164,000 steps, the python script seems stuck somewhere. Is there any way to find out where the script is stuck? My last resort would be using Ctrl-C to terminate the process and force it to print out a backtrace. But I wonder if there are other things I should check before I kill the process.
Here's the train loop:
def train(trainingData, batchSize, workingDir, maxSteps):
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
image, label = readData(trainingData)
minAfterDequeue = 5000
capacity = minAfterDequeue + 3 * batchSize
imageBatch, labelBatch = tf.train.shuffle_batch([image, label], batch_size=batchSize, capacity=capacity, min_after_dequeue=minAfterDequeue)
#labelBatch = tf.reshape(labelBatch, [batchSize, 1])
#tf.image_summary('images', imageBatch)
#tf.histogram_summary('labels', tf.cast(labelBatch, tf.float32))
logits = network.inference(imageBatch, 0.5)
#floatLabel = tf.cast(labelBatch, tf.float32)
#cross_entropy_per_example = tf.nn.softmax_cross_entropy_with_logits(logits, floatLabel)
loss, cross_entropy = network.loss(logits, labelBatch)
train_op = network.train(loss, global_step, batchSize)
# Create a saver
saver = tf.train.Saver(tf.all_variables())
summary_op = tf.merge_all_summaries()
session = tf.Session()
init = tf.initialize_all_variables()
session.run(init)
tf.train.start_queue_runners(sess=session)
summary_writer = tf.train.SummaryWriter(workingDir, session.graph_def)
for step in xrange(maxSteps):
start_time = time.time()
#l, sm, ce = session.run([floatLabel, logits, cross_entropy_per_example])
#print l
#print sm
#print ce
_, loss_value = session.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
examples_per_sec = batchSize / duration
format_str = "%s: step %d, loss = %e (%.1f examples/sec; %.3f sec/batch"
print (format_str % (datetime.now(), step, loss_value, examples_per_sec, float(duration)))
if step % 100 == 0:
summary_str = session.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0 or (step + 1) == maxSteps:
checkpoint_path = os.path.join(workingDir, 'model.ckpt')
saver.save(session, checkpoint_path, global_step = step)
And here's the various functions used to construct the graph:
import re
import tensorflow as tf
TOWER_NAME="tower"
NUM_EXAMPLES_PER_EPOCH = 50000
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.95 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.01 # Initial learning rate.
def _activation_summary(x):
"""Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measure the sparsity of activations.
Args:
x: Tensor
Returns:
nothing
"""
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
#numChannel = tf.shape(x)[3]
#tf.image_summary(tensor_name + '/image', tf.reshape(x)
def _variable_on_cpu(name, shape, initializer):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
return var
def _variable_with_weight_decay(name, shape, stddev, wd=None):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
Returns:
Variable Tensor
"""
var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def inference(images, dropout):
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 1, 32], stddev=5e-2)
conv = tf.nn.conv2d(images, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 32, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv2)
# pool2
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
with tf.variable_scope('conv3') as scope:
kernel = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=5e-2)
conv = tf.nn.conv2d(pool2, kernel, [1,1,1,1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv3)
# pool 3
pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# fully connected 4
with tf.variable_scope('full4') as scope:
batchSize = pool3.get_shape()[0].value
flattened = tf.reshape(pool3, [batchSize, -1])
dim = flattened.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 256], stddev=5e-2)
biases = _variable_on_cpu('biases', [256], tf.constant_initializer(0.1))
full4 = tf.nn.relu(tf.matmul(flattened, weights) + biases, name=scope.name)
full4_dropout = tf.nn.dropout(full4, dropout)
_activation_summary(full4)
#_activation_summary(full4_dropout)
# fully connected 5
with tf.variable_scope('full5') as scope:
weights = _variable_with_weight_decay('weights', [256, 128], stddev=5e-2)
biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
full5 = tf.nn.relu(tf.matmul(full4_dropout, weights) + biases, name=scope.name)
full5_dropout = tf.nn.dropout(full5, dropout)
_activation_summary(full5)
#_activation_summary(full5_dropout)
# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [128, 2], stddev=1/128.0)
biases = _variable_on_cpu('biases', [2], tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(full5_dropout, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
def loss(logits, labels):
labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss'), cross_entropy_mean
def _add_loss_summaries(total_loss):
"""Add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(loss, step, batchSize):
numBatchesPerEpoch = NUM_EXAMPLES_PER_EPOCH / batchSize
decay_steps = int(numBatchesPerEpoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
loss_averages_op = _add_loss_summaries(loss)
# compute gradients
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(loss)
# apply gradients
apply_gradient_op = opt.apply_gradients(grads, global_step = step)
# add histograms for trainable variables
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
# add histograms for gradients:
for grad, var in grads:
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
# Track the moving average of all trainable variables
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variable_averages_op]):
train_op = tf.no_op(name='train')
return train_op