Add a summary of accuracy of the whole train/test dataset in Tensorflow - tensorflow

I am trying to use Tensorboard to visualize my training procedure. My purpose is, when every epoch completed, I would like to test the network's accuracy using the whole validation dataset, and store this accuracy result into a summary file, so that I can visualize it in Tensorboard.
I know Tensorflow has summary_op to do it, however it seems only work for one batch when running the code sess.run(summary_op). I need to calculate the accuracy for the whole dataset. How?
Is there any example to do it?

Define a tf.scalar_summary that accepts a placeholder:
accuracy_value_ = tf.placeholder(tf.float32, shape=())
accuracy_summary = tf.scalar_summary('accuracy', accuracy_value_)
Then calculate the accuracy for the whole dataset (define a routine that calculates the accuracy for every batch in the dataset and extract the mean value) and save it into a python variable, let's call it va.
Once you have the value of va, just run the accuracy_summary op, feeding the accuracy_value_ placeholder:
sess.run(accuracy_summary, feed_dict={accuracy_value_: va})

I implement a naive one-layer model as an example to classify MNIST dataset and visualize validation accuracy in Tensorboard, it works for me.
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import os
# number of epoch
num_epoch = 1000
model_dir = '/tmp/tf/onelayer_model/accu_info'
# mnist dataset location, change if you need
data_dir = '../data/mnist'
# load MNIST dataset without one hot
dataset = read_data_sets(data_dir, one_hot=False)
# Create placeholder for input images X and labels y
X = tf.placeholder(tf.float32, [None, 784])
# one_hot = False
y = tf.placeholder(tf.int32)
# One layer model graph
W = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[10]))
logits = tf.nn.relu(tf.matmul(X, W) + b)
init = tf.initialize_all_variables()
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
# loss function
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
_, top_1_op = tf.nn.top_k(logits)
top_1 = tf.reshape(top_1_op, shape=[-1])
correct_classification = tf.cast(tf.equal(top_1, y), tf.float32)
# accuracy function
acc = tf.reduce_mean(correct_classification)
# define info that is used in SummaryWritter
acc_summary = tf.scalar_summary('valid_accuracy', acc)
valid_summary_op = tf.merge_summary([acc_summary])
with tf.Session() as sess:
# initialize all the variable
sess.run(init)
print("Writing Summaries to %s" % model_dir)
train_summary_writer = tf.train.SummaryWriter(model_dir, sess.graph)
# load validation dataset
valid_x = dataset.validation.images
valid_y = dataset.validation.labels
for epoch in xrange(num_epoch):
batch_x, batch_y = dataset.train.next_batch(100)
feed_dict = {X: batch_x, y: batch_y}
_, acc_value, loss_value = sess.run(
[train_op, acc, loss], feed_dict=feed_dict)
vsummary = sess.run(valid_summary_op,
feed_dict={X: valid_x,
y: valid_y})
# Write validation accuracy summary
train_summary_writer.add_summary(vsummary, epoch)

Using batching with your validation set is possible in case you are using tf.metrics ops, which use internal counters. Here is a simplified example:
model = create_model()
tf.summary.scalar('cost', model.cost_op)
acc_value_op, acc_update_op = tf.metrics.accuracy(labels,predictions)
summary_common = tf.summary.merge_all()
summary_valid = tf.summary.merge([
tf.summary.scalar('accuracy', acc_value_op),
# other metrics here...
])
with tf.Session() as sess:
train_writer = tf.summary.FileWriter(logs_path + '/train',
sess.graph)
valid_writer = tf.summary.FileWriter(logs_path + '/valid')
While training, only write the common summary using your train-writer:
summary = sess.run(summary_common)
train_writer.add_summary(summary, tf.train.global_step(sess, gstep_op))
train_writer.flush()
After every validation, write both summaries using the valid-writer:
gstep, summaryc, summaryv = sess.run([gstep_op, summary_common, summary_valid])
valid_writer.add_summary(summaryc, gstep)
valid_writer.add_summary(summaryv, gstep)
valid_writer.flush()
When using tf.metrics, don't forget to reset the internal counters (local variables) before every validation step.

Related

In tensorflow 1, when the loss function is defined with operations on Tensors, is the model really trained?

First, I m sorry but it's not possible to reproduce this problem on a few lines, as the model involved is a very complex network.
But here is an idea of the code:
def return_iterator(data, nb_epochs, batch_size):
dataset = tf.data.Dataset.from_tensor_slices(data)
dataset = dataset.repeat(nb_epochs).batch(batch_size)
iterator = dataset.make_one_shot_iterator()
yy = iterator.get_next()
return tf.cast(yy, tf.float32)
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
y_pred = complex_model.autoencode(train)
y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
nb_epochs = 10
batch_size = 64
y_real = return_iterator(train, nb_epochs, batch_size)
y_pred = return_iterator(y_pred, nb_epochs, batch_size)
res_equal = 1. - tf.reduce_mean(tf.abs(y_pred - y_real), [1,2,3])
loss = 1 - tf.reduce_sum(res_equal, axis=0)
opt = tf.train.AdamOptimizer().minimize(loss)
tf.global_variables_initializer().run()
for epoch in range(0, nb_epochs):
_, d_loss = sess.run([opt, loss])
To define the loss, I must use operations like tf.reduce_mean and tf.reduce_sum , and these operations only accept Tensors as input.
My question is: with this code, will the complex_model autoencoder be trained during the training ? (eventhough here, it's just used to output the predictions to compute the loss)
Thank you
p.s: I am using TF1.15 (and I cannot use another version)

Tensorflow: How to predict a single image from the model trained?

I am new to tensorflow and I am trying to build an image classifier. I have successfully created the model and I am trying to predict a single image after restoring the model. I have gone through various tutorials (https://github.com/sankit1/cv-tricks.com/blob/master/Tensorflow-tutorials/tutorial-2-image-classifier/predict.py) but I can't figure out the feed-dict thing in my code. I am stuck at predict fnction after loading the saved model. Can someone please help me and tell me what to do after loading all the variables from the saved model?
This is the train function which returns the parameters and save them in a model.
def trainModel(train, test, learning_rate=0.0001, num_epochs=2, minibatch_size=32, graph_filename='costs'):
"""
Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
Input:
train : training set
test : test set
learning_rate : learning rate
num_epochs : number of epochs
minibatch_size : size of minibatch
print_cost : True to print the cost every epoch
Returns:
parameters : parameters learnt by the model
"""
ops.reset_default_graph() #for rerunning the model without resetting tf vars
# input and output shapes
(n_x, m) = train.images.T.shape
n_y = train.labels.T.shape[0]
costs = [] #var for storing the costs for later use
# create placeholders
X, Y = placeholderCreator(n_x, n_y)
parameters = paramInitializer()
# Forward propagation
Z3 = forwardPropagation(X, parameters)
# Cost function
cost = costCalc(Z3, Y)
#Backpropagation using adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize tf variables
init = tf.global_variables_initializer()
minibatch_size = 32
# Start session to compute Tensorflow graph
with tf.Session() as sess:
# Run initialization
sess.run(init)
for epoch in range(num_epochs): # Training loop
epoch_cost = 0.
num_minibatches = int(m / minibatch_size)
for i in range(num_minibatches):
minibatch_X, minibatch_Y = train.next_batch(minibatch_size) # Get next batch of training data and labels
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X.T, Y: minibatch_Y.T}) # Execute optimizer and cost function
epoch_cost += minibatch_cost / num_minibatches # Update epoch cost
saver = tf.train.Saver()
# Save parameters
parameters = sess.run(parameters)
saver.save(sess, "~/trained-model.ckpt")
return parameters
And this is my predict function where I am trying to predict an image. I have converted that image into MNIST format for ease of use (predicting_data). I load the model that I saved, use a softmax function on the output of 3rd layer (final output).
def predict():
train = predicting_data.train
(n_x, m) = train.images.T.shape
n_y = train.labels.T.shape[0]
X, Y = placeholderCreator(n_x, n_y)
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('~/trained-model.ckpt.meta')
new_saver.restore(sess, '~/trained-model.ckpt')
W1 = tf.get_default_graph().get_tensor_by_name('W1:0')
b1 = tf.get_default_graph().get_tensor_by_name('b1:0')
W2 = tf.get_default_graph().get_tensor_by_name('W2:0')
b2 = tf.get_default_graph().get_tensor_by_name('b2:0')
W3 = tf.get_default_graph().get_tensor_by_name('W3:0')
b3 = tf.get_default_graph().get_tensor_by_name('b3:0')
# forward propagation
Z1 = tf.add(tf.matmul(W1,X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1), b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3,A2), b3)
y_pred = tf.nn.softmax(Z3) ####what to do after this????
cost = sess.run(y_pred, feed_dict={X: train.images.T})
Thank you in advance!
As vijay says in his comment:
Your predict part is not right, you need to get the input and predict tensors from the saved graph using the get_tensor_by_name() function and then use it in your sess.run
If you look at this post, it covers a similar problem and has some code examples.
In your code, you can pass 1 to the next_batch method and get just one image.
minibatch_X, minibatch_Y = train.next_batch(1)

How to switch from GradientDescent Optimizer to Adam in Tensorflow

My code is running perfectly with Gradient Descent, but I want to compare the effectiveness of my algorithm using Adam Optimizer, so I tried to modify the following code:
# Import MNIST data
#import input_data
#mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
#fashion_mnist = input_data.read_data_sets('data/fashion')
import tensorflow as tf
# Set parameters
learning_rate = 0.01 #1e-4
training_iteration = 30
batch_size = 100
display_step = 2
# TF graph input
x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
#regularizer = tf.reduce_sum(tf.square(y))
# Create a model
# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
with tf.name_scope("Wx_b") as scope:
# Construct a linear model
model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
# Add summary ops to collect data
w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("biases", b)
# More name scopes will clean up graph representation
with tf.name_scope("cost_function") as scope:
# Minimize error using cross entropy
# Cross entropy
cost_function = -tf.reduce_sum(y*tf.log(model))
# Create a summary to monitor the cost function
tf.summary.scalar("cost_function", cost_function)
with tf.name_scope("train") as scope:
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
# Initializing the variables
#init = tf.initialize_all_variables()
init = tf.global_variables_initializer()
# Merge all summaries into a single operator
merged_summary_op = tf.summary.merge_all()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary_writer = tf.summary.FileWriter('/home/raed/Tensorflow/tensorflow_demo', graph_def =sess.graph_def)
#writer.add_graph(sess.graph_def)
# Training cycle
for iteration in range(training_iteration):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute the average loss
avg_cost += sess.run(cost_function, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Write logs for each iteration
summary_str = sess.run(merged_summary_op, feed_dict={x: batch_xs, y: batch_ys})
summary_writer.add_summary(summary_str, iteration*total_batch + i)
# Display logs per iteration step
if iteration % display_step == 0:
print ("Iteration:" "%04d" % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
print ("Tuning completed!")
# Test the model
predictions = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(predictions, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
to use Adam Optimizer I tried to change the following line :
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
and replace it with the AdamOptimizer :
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_function)
when I ran the code , I got few iteration and then it stopped with the following error.
InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights
[[Node: weights = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](weights/tag, Variable/read)]]
could you please help me understnad the problem , thanks in advance
the problem is weights are initialized to zero W = tf.Variable(tf.zeros([784, 10])) that`s why you re get Nan as weights.
you need to inialize them with some initializer i.e normal distribution as follow
W = tf.Variable(tf.random_normal([784, 10], stddev=0.35),
name="weights")

how to restore the learning rate in TF from previously saved checkpoint ?

I have stopped training at some point and saved checkpoint, meta files etc.
Now when I want to resume training, I want to start with last running learning rate of the optimizer. Can you provide a example of doing so ?
For those coming here (like me) wondering whether the last learning rate is automatically restored: tf.train.exponential_decay doesn't add any Variables to the graph, it only adds the operations necessary to derive the correct current learning rate value given a certain global_step value. This way, you only need to checkpoint the global_step value (which is done by default normally) and, assuming you keep the same initial learning rate, decay steps and decay factor, you'll automatically pick up training where you left it, with the correct learning rate value.
Inspecting the checkpoint won't show any learning_rate variable (or similar), simply because there is no need for any.
This example code learns to add two numbers:
import tensorflow as tf
import numpy as np
import os
save_ckpt_dir = './add_ckpt'
ckpt_filename = 'add.ckpt'
save_ckpt_path = os.path.join(save_ckpt_dir, ckpt_filename)
if not os.path.isdir(save_ckpt_dir):
os.mkdir(save_ckpt_dir)
if [fname.startswith("add.ckpt") for fname in os.listdir(save_ckpt_dir)]: # prefer to load pre-trained net
load_ckpt_path = save_ckpt_path
else:
load_ckpt_path = None # train from scratch
def add_layer(inputs, in_size, out_size, activation_fn=None):
Weights = tf.Variable(tf.ones([in_size, out_size]), name='Weights')
biases = tf.Variable(tf.zeros([1, out_size]), name='biases')
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
if activation_fn is None:
layer_output = Wx_plus_b
else:
layer_output = activation_fn(Wx_plus_b)
return layer_output
def produce_batch(batch_size=256):
"""Loads a single batch of data.
Args:
batch_size: The number of excersises in the batch.
Returns:
x : column vector of numbers
y : another column of numbers
xy_sum : the sum of the columns
"""
x = np.random.random(size=[batch_size, 1]) * 10
y = np.random.random(size=[batch_size, 1]) * 10
xy_sum = x + y
return x, y, xy_sum
with tf.name_scope("inputs"):
xs = tf.placeholder(tf.float32, [None, 1])
ys = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("correct_labels"):
xysums = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
with tf.name_scope("graph_body"):
prediction = add_layer(tf.concat([xs, ys], 1), 2, 1, activation_fn=None)
with tf.name_scope("loss_and_train"):
# the error between prediction and real data
loss = tf.reduce_mean(tf.reduce_sum(tf.square(xysums-prediction), reduction_indices=[1]))
# Passing global_step to minimize() will increment it at each step.
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
with tf.name_scope("init_load_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
if load_ckpt_path:
saver.restore(sess, load_ckpt_path)
for i in range(1000):
x, y, xy_sum = produce_batch(256)
_, global_step_np, loss_np, lr_np = sess.run([train_step, global_step, loss, lr], feed_dict={xs: x, ys: y, xysums: xy_sum})
if global_step_np % 100 == 0:
print("global step: {}, loss: {}, learning rate: {}".format(global_step_np, loss_np, lr_np))
saver.save(sess, save_ckpt_path)
if you run it a few times, you will see the learning rate decrease. It also saves the global step. The trick is here:
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
...
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
By default, saver.save will save all savable objects (including learning rate and global step). However, if tf.train.Saver is provided with var_list, saver.save will only save the vars included in var_list:
saver = tf.train.Saver(var_list = ..list of vars to save..)
sources:
https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay
https://stats.stackexchange.com/questions/200063/tensorflow-adam-optimizer-with-exponential-decay
https://www.tensorflow.org/api_docs/python/tf/train/Saver (see "saveable objects")

Tensorflow passing image to simple MNIST Data Model

I have simple model for MNIST data classification with accuracy around 92%.
I would like to know if there is any way I can provide image with digit and get label as output for that digit ? Image can be from mnist test data, rather than custom image, just to avoid image preprocessing? Below is code for my model.
Thanks
import tensorflow as tf
#reset graph
tf.reset_default_graph()
#constants
learning_rate = 0.5
batch_size = 100
training_epochs = 5
logs_path = "/tmp/mnist/2"
#load mnist data set
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
with tf.name_scope('inputs'):
x = tf.placeholder(tf.float32, shape=[None,784], name = "image-input")
y_= tf.placeholder(tf.float32, shape=[None, 10], name = "labels-input")
#weights
with tf.name_scope("weights"):
W = tf.Variable(tf.zeros([784,10]))
#biases
with tf.name_scope("biases"):
b= tf.Variable(tf.zeros([10]))
#Activation function softmax
with tf.name_scope("softmax"):
#y is prediction
y = tf.nn.softmax(tf.matmul(x,W) +b)
#Cost function
with tf.name_scope('cross_entropy'):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),reduction_indices=[1])) #????
#Define Optimizer
with tf.name_scope('train'):
train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
#Accuracy
with tf.name_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar("cost",cross_entropy)
tf.summary.scalar("accuracy",accuracy)
#Merge all summaries into a single "operation" which will be executed in a session
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
#initialize variables before using them
sess.run(tf.global_variables_initializer())
#log writer object
# writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())
writer = tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
#training cycles
for epoch in range(training_epochs):
#number of batches in one epoch
batch_count = int(mnist.train.num_examples/batch_size)
for i in range(batch_count):
batch_x, batch_y = mnist.train.next_batch(batch_size)
_,summary = sess.run([train_optimizer,summary_op], feed_dict={x: batch_x, y_:batch_y})
writer.add_summary(summary,epoch * batch_count + i)
if epoch % 5 == 0:
print("Epoch: ",epoch)
print("Accuracy: ",accuracy.eval(feed_dict={x: mnist.test.images,y_:mnist.test.labels}))
print("Done")
After you trained the network, you can get the label that the network gives to a new image by doing
new_image_label= sess.run(y, feed_dict={x: new_image})
Note that the format of new_image should be the same as of batch_x. Think about new_image as a batch of size 1, so if batch_x is 2D, new_image should also be 2D (of shape 1 by 784).
In addition, if you did some pre-processing (like normalization for example) to the images in batch_x, you need to do the same thing with new_image.
You could also get the labels of several images simultaneously with the same code as above. Just replace new_image with some 2D array of several images new_images.