Tensorflow reporting wrong AUC - tensorflow

I have an issue while using AUC from tensorflow library. I train my model (convolutional neural network) per batch ( i do not use a validation set) and after each epoch I use an independent test set to obtain my evaluations. The problem lies within AUC evaluation.
In each batch I calculate AUC/Accuracy/Loss/Precision/Recall/F1_score for the training set and then I aggregate the mean of these scores. When I try to do the same for the test set I again calculate the same scores. I notice that all scores except AUC have different values. I think it is not correct test's loss function to increase and AUC to increase as well. And the problem is that test's AUC is almost identical to training's AUC (even though their accuracy, loss error are completely different).
with tf.name_scope("output"):
W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
predictions = tf.argmax(scores, 1, name="predictions")
l2_loss += tf.nn.l2_loss(W, name="l2_loss")
l2_loss += tf.nn.l2_loss(b, name="l2_loss")
tf.summary.histogram("l2", l2_loss)
tf.summary.histogram("weigths", W)
tf.summary.histogram("biases", b)
with tf.name_scope("auc_score"):
# labelOut = tf.argmax(y_place_holder, 1)
probability = tf.nn.softmax(scores)
# auc_scoreTemp = streaming_auc(y_place_holder, probability, curve="PR")
auc_scoreTemp = tf.metrics.auc(y_place_holder, probability, curve="PR")
auc_score = tf.reduce_mean(tf.cast(auc_scoreTemp, tf.float32), name="auc_score")
tf.summary.scalar("auc_score", auc_score)
with tf.name_scope("accuracy"):
labelOut = tf.argmax(y_place_holder, 1)
correct_prediction = tf.equal(predictions, tf.argmax(y_place_holder, 1), name="correct_prediction")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
tf.summary.scalar("accuracy", accuracy)
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for batch in batches:
x_batch, y_batch = list(zip(*batch))
_, accuracy_train, auc_training, loss_train, prec_batch, recall_batch, f1_batch \
= sess.run([train_step, accuracy, auc_score, cross_entropy, precision_mini_batch,
recall_mini_batch, f1_score_min_batch], feed_dict={x_place_holder: x_batch,
y_place_holder: y_batch,
emb_place_holder: vocab_inv_emb_dset,
dropout_keep_prob: dropout_rate})
...
for test_batch in test_batches:
auc_test = None
x_test_batch, y_test_batch = list(zip(*test_batch))
accuracy_test, loss_test, auc_test = sess.run([accuracy, cross_entropy, auc_score],
feed_dict={x_place_holder: x_test_batch,
y_place_holder: y_test_batch,
emb_place_holder: vocab_inv_emb_dset_val,
dropout_keep_prob: 1.0})
I also tried to use streaming_auc which returns always 1.
EDIT
In the end of every epoch I reset the local variables by running:
sess.run(tf.local_variables_initializer())
But the first batch outputs really bad results. After the first batch I get normal results from test set which are not close to the training results. I don't know if this is the correct way to do it but results seem more realistic this way.

All of the tf.metrics return a value and an updating op (see here). So as described here you want to use the updating op to accumulate values and then evaluate auc_score to retrieve the accumulated value, something like this:
...
auc_score, auc_op = tf.metrics.auc(y_place_holder, probability, curve="PR")
...
for batch in batches:
sess.run([train_step, accuracy, auc_op, cross_entropy,...)
...
py_auc = sess.run(auc)
EDIT -- toy example showing tf.metrics.auc and tf.contrib.metrics.streaming_auc
import tensorflow as tf
from tensorflow.contrib import metrics
batch_sz = 100
noise_mag = 0.5
nloop = 10
tf.set_random_seed(0)
batch_x = tf.random_uniform([batch_sz, 1], 0, 2, dtype=tf.int32)
noise = noise_mag * tf.random_normal([batch_sz, 1])
batch_y = tf.sigmoid(tf.to_float(batch_x) + noise)
auc_val, auc_accum = tf.metrics.auc(batch_x, batch_y)
#note: contrib.metrics.streaming_auc reverses labels, predictions
auc_val2, auc_accum2 = metrics.streaming_auc(batch_y, batch_x)
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
for i in range(nloop):
_ = sess.run([auc_accum, auc_accum2])
auc, auc2 = sess.run([auc_val, auc_val2])
print('Accumulated AUC = ', sess.run(auc_val)) #0.9238014
print('Accumulated AUC2 = ', sess.run(auc_val)) #0.9238014

Related

In tensorflow 1, when the loss function is defined with operations on Tensors, is the model really trained?

First, I m sorry but it's not possible to reproduce this problem on a few lines, as the model involved is a very complex network.
But here is an idea of the code:
def return_iterator(data, nb_epochs, batch_size):
dataset = tf.data.Dataset.from_tensor_slices(data)
dataset = dataset.repeat(nb_epochs).batch(batch_size)
iterator = dataset.make_one_shot_iterator()
yy = iterator.get_next()
return tf.cast(yy, tf.float32)
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
y_pred = complex_model.autoencode(train)
y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
nb_epochs = 10
batch_size = 64
y_real = return_iterator(train, nb_epochs, batch_size)
y_pred = return_iterator(y_pred, nb_epochs, batch_size)
res_equal = 1. - tf.reduce_mean(tf.abs(y_pred - y_real), [1,2,3])
loss = 1 - tf.reduce_sum(res_equal, axis=0)
opt = tf.train.AdamOptimizer().minimize(loss)
tf.global_variables_initializer().run()
for epoch in range(0, nb_epochs):
_, d_loss = sess.run([opt, loss])
To define the loss, I must use operations like tf.reduce_mean and tf.reduce_sum , and these operations only accept Tensors as input.
My question is: with this code, will the complex_model autoencoder be trained during the training ? (eventhough here, it's just used to output the predictions to compute the loss)
Thank you
p.s: I am using TF1.15 (and I cannot use another version)

how to restore the learning rate in TF from previously saved checkpoint ?

I have stopped training at some point and saved checkpoint, meta files etc.
Now when I want to resume training, I want to start with last running learning rate of the optimizer. Can you provide a example of doing so ?
For those coming here (like me) wondering whether the last learning rate is automatically restored: tf.train.exponential_decay doesn't add any Variables to the graph, it only adds the operations necessary to derive the correct current learning rate value given a certain global_step value. This way, you only need to checkpoint the global_step value (which is done by default normally) and, assuming you keep the same initial learning rate, decay steps and decay factor, you'll automatically pick up training where you left it, with the correct learning rate value.
Inspecting the checkpoint won't show any learning_rate variable (or similar), simply because there is no need for any.
This example code learns to add two numbers:
import tensorflow as tf
import numpy as np
import os
save_ckpt_dir = './add_ckpt'
ckpt_filename = 'add.ckpt'
save_ckpt_path = os.path.join(save_ckpt_dir, ckpt_filename)
if not os.path.isdir(save_ckpt_dir):
os.mkdir(save_ckpt_dir)
if [fname.startswith("add.ckpt") for fname in os.listdir(save_ckpt_dir)]: # prefer to load pre-trained net
load_ckpt_path = save_ckpt_path
else:
load_ckpt_path = None # train from scratch
def add_layer(inputs, in_size, out_size, activation_fn=None):
Weights = tf.Variable(tf.ones([in_size, out_size]), name='Weights')
biases = tf.Variable(tf.zeros([1, out_size]), name='biases')
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
if activation_fn is None:
layer_output = Wx_plus_b
else:
layer_output = activation_fn(Wx_plus_b)
return layer_output
def produce_batch(batch_size=256):
"""Loads a single batch of data.
Args:
batch_size: The number of excersises in the batch.
Returns:
x : column vector of numbers
y : another column of numbers
xy_sum : the sum of the columns
"""
x = np.random.random(size=[batch_size, 1]) * 10
y = np.random.random(size=[batch_size, 1]) * 10
xy_sum = x + y
return x, y, xy_sum
with tf.name_scope("inputs"):
xs = tf.placeholder(tf.float32, [None, 1])
ys = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("correct_labels"):
xysums = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
with tf.name_scope("graph_body"):
prediction = add_layer(tf.concat([xs, ys], 1), 2, 1, activation_fn=None)
with tf.name_scope("loss_and_train"):
# the error between prediction and real data
loss = tf.reduce_mean(tf.reduce_sum(tf.square(xysums-prediction), reduction_indices=[1]))
# Passing global_step to minimize() will increment it at each step.
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
with tf.name_scope("init_load_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
if load_ckpt_path:
saver.restore(sess, load_ckpt_path)
for i in range(1000):
x, y, xy_sum = produce_batch(256)
_, global_step_np, loss_np, lr_np = sess.run([train_step, global_step, loss, lr], feed_dict={xs: x, ys: y, xysums: xy_sum})
if global_step_np % 100 == 0:
print("global step: {}, loss: {}, learning rate: {}".format(global_step_np, loss_np, lr_np))
saver.save(sess, save_ckpt_path)
if you run it a few times, you will see the learning rate decrease. It also saves the global step. The trick is here:
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
...
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
By default, saver.save will save all savable objects (including learning rate and global step). However, if tf.train.Saver is provided with var_list, saver.save will only save the vars included in var_list:
saver = tf.train.Saver(var_list = ..list of vars to save..)
sources:
https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay
https://stats.stackexchange.com/questions/200063/tensorflow-adam-optimizer-with-exponential-decay
https://www.tensorflow.org/api_docs/python/tf/train/Saver (see "saveable objects")

Massive diff. between Training and Testing accuracy: just overfitting or am I missing somth obvious?

I am using Tensorflow to adopt the Convolution and Pooling techniques to a deep learning project that is unrelated to images and uses a simple numeric dataset as input. I realize that it's a bit apples and oranges, but I quite like the approach of 'sliding a window' over a set of observations and extracting a set of features out of it, even though the observations aren't exactly pixels, so I wanted to give it a try.
So far I am getting very disappointing Testing Accuracy, and I would have given up by now, except that the Training Accuracy and Loss seem to be performing reasonably well and respond in a sensible way to various adjustments in hyperparameters.
I am well aware of the possibility of overfitting, but could it really be so bad that I would achieve 90% accuracy on Training and remain stuck at 25% accuracy on Testing? My data has 4 classes, so 25% accuracy in testing is basically purely random outcomes. I am wondering if I'm just missing something completely obvious here?
I'm trying to analyze my Graph in TensorBoard and as far as I can understand I don't see anything wrong with with the computation of Training and Testing accuracies. The only thing I don't understand is why Training and Testing input queues are listed on the side and don't seem to be connected with anything, but I can see from running the code and logging from inside the code that TF reads the appropriate training and testing data in batches.
My network quite simple -- 1 convolution layer + 1 fully connected layer + 1 readout layer. Each input row has 480 columns and the idea is to keep it that way, instead of forming an AxB matrix. I then 'slide' a 30x1 window across this line with the given stride. I am extracting 50 features out of the convolution layer and 100 features out of the fully connected layer. I split the original dataset 80/20 for training and testing via sklearn.cross_validation.train_test_split
Do I have too many degrees of freedom that the network simply overfits and memorizes the training data and remains useless for the testing data? Or am I not evaluating the Test Accuracy properly?
Testing Accuracy is calculated as follows:
dateLbl_batch, feature_batch, label_batch = sess.run([dateLblTest, featuresTest, labelsTest])
acc, summary = sess.run([accuracyTest, summary_test], feed_dict={X: feature_batch, Y_: label_batch})
i += 1
summary_writer.add_summary(summary, i)
Where accuracyTest is defined like this:
with tf.name_scope('AccuracyTest'):
accuracyTest = tf.reduce_mean(tf.cast(tf.equal(
tf.argmax(Y, 1),
tf.argmax(Y_, 1)), tf.float32))
and Y_ are the labels loaded from the Test dataset and Y is the output of the Readout Layer:
Y = tf.matmul(h_fc1, W_fc2, name='ReadOut_Layer') + b_fc2
Here's the relevant part of my code that has it all together:
TS = 480
TL = 4
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W, sX, sY):
return tf.nn.conv2d(x, W, strides=[1, sX, sY, 1], padding='SAME')
def pool_fn(x, kX, kY, sX, sY):
return tf.nn.max_pool(x, ksize=[1, kX, kY, 1], strides=[1, sX, sY, 1], padding='SAME')
Y_ = tf.placeholder(tf.float32, [None, TL], name='pl_labels')
X = tf.placeholder(tf.float32, [None, TS], name='pl_x')
# Convolution Parameters
frame_x = 480
frame_y = 1
wnd_x = 30
wnd_y = 1
features_l1 = 50
features_lFC = 100
conv_stride_x = 1
conv_stride_y = 1
pool_krn_x = 2
pool_krn_y = 1
pool_stride_x = 2
pool_stride_y = 1
fc_x = int(frame_x / pool_krn_x)
fc_y = int(frame_y / pool_krn_y)
# 1st Layer
x_conv = tf.reshape(X, [-1,frame_x,frame_y,1])
W_conv1 = weight_variable([wnd_x, wnd_y, 1, features_l1])
b_conv1 = bias_variable([features_l1])
h_conv1 = tf.nn.relu(conv2d(x_conv, W_conv1, conv_stride_x, conv_stride_y) + b_conv1)
h_pool1 = pool_fn(h_conv1, pool_krn_x, pool_krn_y, pool_stride_x, pool_stride_y)
# Fully Connected Layer
W_fc1 = weight_variable([fc_x * fc_y * features_l1, features_lFC])
b_fc1 = bias_variable([features_lFC])
h_pool2_flat = tf.reshape(h_pool1, [-1, fc_x*fc_y*features_l1])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Readout Layer
W_fc2 = weight_variable([features_lFC, TL])
b_fc2 = bias_variable([TL])
Y = tf.matmul(h_fc1, W_fc2, name='ReadOut_Layer') + b_fc2
dateLbl, features, labels = input_pipeline(fileNameTrain, batch_size, try_epochs)
dateLblTest, featuresTest, labelsTest = input_pipeline(fileNameTest, batch_size, 1)
with tf.name_scope('SoftMaxModel'):
myModel = tf.nn.softmax_cross_entropy_with_logits(labels=Y_, logits=Y, name='SoftMaxModel')
with tf.name_scope('LossFn'):
lossFn = tf.reduce_mean(myModel, name = 'LossFn')
with tf.name_scope('Optimizer'):
train_step = tf.train.AdamOptimizer(1e-4, name='AdamConst').minimize(lossFn, name='MinimizeLossFn')
with tf.name_scope('AccuracyTrain'):
accuracyTrain = tf.reduce_mean(tf.cast(tf.equal(
tf.argmax(Y, 1),
tf.argmax(Y_, 1)), tf.float32))
with tf.name_scope('AccuracyTest'):
accuracyTest = tf.reduce_mean(tf.cast(tf.equal(
tf.argmax(Y, 1),
tf.argmax(Y_, 1)), tf.float32))
a1 = tf.summary.histogram("Model", myModel)
a2 = tf.summary.scalar("Loss", lossFn)
a3 = tf.summary.scalar("AccuracyTrain", accuracyTrain)
a4 = tf.summary.scalar("AccuracyTest", accuracyTest)
summary_train = tf.summary.merge([a1, a2, a3])
summary_test = tf.summary.merge([a4])
with tf.Session() as sess:
summary_writer = tf.summary.FileWriter(logs_path, sess.graph)
gInit = tf.global_variables_initializer().run()
lInit = tf.local_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
############################ TRAINING ############################
try:
i = 0
acCumTrain = 0
while not coord.should_stop():
dateLbl_batch, feature_batch, label_batch = sess.run([dateLbl, features, labels])
_, acc, summary = sess.run([train_step, accuracyTrain, summary_train], feed_dict={X: feature_batch, Y_: label_batch})
i += 1
summary_writer.add_summary(summary, i)
acCumTrain += acc
except tf.errors.OutOfRangeError:
acCumTrain /= i
print('-------------- Finished Training ---------------')
finally:
coord.request_stop()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
############################ TESTING ############################
try:
i = 0
acCumTest = 0
while not coord.should_stop():
dateLbl_batch, feature_batch, label_batch = sess.run([dateLblTest, featuresTest, labelsTest])
acc, summary = sess.run([accuracyTest, summary_test], feed_dict={X: feature_batch, Y_: label_batch})
i += 1
summary_writer.add_summary(summary, i)
acCumTest += acc
except tf.errors.OutOfRangeError:
acCumTest /= i
print('-------------- Finished Testing ---------------')
finally:
coord.request_stop()
print('Training Accuracy: {:.2f} Testing Accuracy: {:.2f}'.format(acCumTrain, acCumTest))
coord.join(threads)
Here are the screenshots of training and testing accuracy from TensorBoard -- looks promising in Training, but just random noise in Testing!
Here is the overall Graph:
And here is the zoom of the Graph that shows how Training and Testing Accuracy are calculated:
Looks like overfitting to me. Try adding dropout and weight decay and see what changes. Maybe even increase the capacity, while applying regularization, by adding more layers. If this does not help you should take a look at the data. Maybe your training/test set is too different. Then augmentations might help.
The numbers you give in the comments make me think that it's definitely overfitting. For example, MNIST has twice as many features as you but 35 times as many training examples.
I think it's also useful to get a sense for how many weights (parameters) you have -- that surprised me when I started using CNNs. Your first conv layer has 30*50 featues = 1500, which isn't so bad. But then you have 50*480/2=12000 INPUTS to your fully-connected layer, which turn into 48000 features in your FC. That's a pretty big matrix for the size of your problem.
You could actually make your parameter space a lot smaller by adding a conv-layer or two, along with pooling layers.
Other ways to make your parameter-space smaller: decrease the size of your conv-filter from 50 to 10, or use dimension-reduction techniques like PCA.
Finally, don't forget that there's actually a Conv1d function that suits your use much better.

Simple model gets 0.0 accuracy

I am training a simple model on a dataset containing labels always equal to 0, and am getting a 0.0 accuracy.
The model is the following:
import csv
import numpy as np
import pandas as pd
import tensorflow as tf
labelsReader = pd.read_csv('data.csv',usecols = [12],header=None)
dataReader = pd.read_csv('data.csv',usecols = [1,2,3,4,5,6,7,8,9,10,11],header=None)
labels_ = labelsReader.values
data_ = dataReader.values
labels = np.float32(labels_)
data = np.float32(data_)
x = tf.placeholder(tf.float32, [None, 11])
W = tf.Variable(tf.truncated_normal([11, 1], stddev=1./11.))
b = tf.Variable(tf.zeros([1]))
y = tf.matmul(x, W) + b
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 1])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for i in range(0, 1000):
train_step.run(feed_dict={x: data, y_: labels})
correct_prediction = tf.equal(y, y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: data, y_: labels}))
And here is the dataset:
444444,0,0,0.9993089149965446,0,0,0.000691085003455425,0,0,0,0,0,0
As the model trains, y of the data shown above decreases, and reaches -1000 after 1000 iterations.
What could be the cause of the failure to train the model ?
Your accuracy checks if the predicted float is exactly equal to the value you expect. With the network you made this is a very difficult task (although you might have a chance as you are also overfitting your data).
To get better results:
- Define accuracy to be higher/lower than a value (closer to 1 or closer to 0).
- Normalise your input data, I don't know the range of your input, but 444444 is a rediculous value to use as input, and it is difficult to train weights that can handle these values.
Also: try to add some sanity checks. For example: what is the output your model is predicting? (y.eval) And what is the cross entropy you have during training your network? (sess.run([accuracy,cross_entropy], feed_dict={x: data, y_: labels})
Good luck!

Add a summary of accuracy of the whole train/test dataset in Tensorflow

I am trying to use Tensorboard to visualize my training procedure. My purpose is, when every epoch completed, I would like to test the network's accuracy using the whole validation dataset, and store this accuracy result into a summary file, so that I can visualize it in Tensorboard.
I know Tensorflow has summary_op to do it, however it seems only work for one batch when running the code sess.run(summary_op). I need to calculate the accuracy for the whole dataset. How?
Is there any example to do it?
Define a tf.scalar_summary that accepts a placeholder:
accuracy_value_ = tf.placeholder(tf.float32, shape=())
accuracy_summary = tf.scalar_summary('accuracy', accuracy_value_)
Then calculate the accuracy for the whole dataset (define a routine that calculates the accuracy for every batch in the dataset and extract the mean value) and save it into a python variable, let's call it va.
Once you have the value of va, just run the accuracy_summary op, feeding the accuracy_value_ placeholder:
sess.run(accuracy_summary, feed_dict={accuracy_value_: va})
I implement a naive one-layer model as an example to classify MNIST dataset and visualize validation accuracy in Tensorboard, it works for me.
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import os
# number of epoch
num_epoch = 1000
model_dir = '/tmp/tf/onelayer_model/accu_info'
# mnist dataset location, change if you need
data_dir = '../data/mnist'
# load MNIST dataset without one hot
dataset = read_data_sets(data_dir, one_hot=False)
# Create placeholder for input images X and labels y
X = tf.placeholder(tf.float32, [None, 784])
# one_hot = False
y = tf.placeholder(tf.int32)
# One layer model graph
W = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[10]))
logits = tf.nn.relu(tf.matmul(X, W) + b)
init = tf.initialize_all_variables()
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
# loss function
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
_, top_1_op = tf.nn.top_k(logits)
top_1 = tf.reshape(top_1_op, shape=[-1])
correct_classification = tf.cast(tf.equal(top_1, y), tf.float32)
# accuracy function
acc = tf.reduce_mean(correct_classification)
# define info that is used in SummaryWritter
acc_summary = tf.scalar_summary('valid_accuracy', acc)
valid_summary_op = tf.merge_summary([acc_summary])
with tf.Session() as sess:
# initialize all the variable
sess.run(init)
print("Writing Summaries to %s" % model_dir)
train_summary_writer = tf.train.SummaryWriter(model_dir, sess.graph)
# load validation dataset
valid_x = dataset.validation.images
valid_y = dataset.validation.labels
for epoch in xrange(num_epoch):
batch_x, batch_y = dataset.train.next_batch(100)
feed_dict = {X: batch_x, y: batch_y}
_, acc_value, loss_value = sess.run(
[train_op, acc, loss], feed_dict=feed_dict)
vsummary = sess.run(valid_summary_op,
feed_dict={X: valid_x,
y: valid_y})
# Write validation accuracy summary
train_summary_writer.add_summary(vsummary, epoch)
Using batching with your validation set is possible in case you are using tf.metrics ops, which use internal counters. Here is a simplified example:
model = create_model()
tf.summary.scalar('cost', model.cost_op)
acc_value_op, acc_update_op = tf.metrics.accuracy(labels,predictions)
summary_common = tf.summary.merge_all()
summary_valid = tf.summary.merge([
tf.summary.scalar('accuracy', acc_value_op),
# other metrics here...
])
with tf.Session() as sess:
train_writer = tf.summary.FileWriter(logs_path + '/train',
sess.graph)
valid_writer = tf.summary.FileWriter(logs_path + '/valid')
While training, only write the common summary using your train-writer:
summary = sess.run(summary_common)
train_writer.add_summary(summary, tf.train.global_step(sess, gstep_op))
train_writer.flush()
After every validation, write both summaries using the valid-writer:
gstep, summaryc, summaryv = sess.run([gstep_op, summary_common, summary_valid])
valid_writer.add_summary(summaryc, gstep)
valid_writer.add_summary(summaryv, gstep)
valid_writer.flush()
When using tf.metrics, don't forget to reset the internal counters (local variables) before every validation step.