Network bug - Inception v1 isn't training - tensorflow

I am trying to use the Inception model (GoogLeNet) from this link https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v1.py which is implemented by Google using the Tensorflow Slim API, to classify images from the Cifar10 dataset (dataset link https://www.cs.toronto.edu/~kriz/cifar.html). The problem is that the network cost stays almost constant and I can't find the bug. I am very new to tensorflow and slim, so I whould really appreciate any help.
I am using these packages:
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops
import matplotlib.pyplot as plt
import os
import pickle
import cv2
from sklearn import model_selection as ms
from nets import inception_v1,inception_utils
import math
%matplotlib inline
And I made theese two functions:
def one_hot_matrix(labels, C):
C = tf.constant(C,name='C')
one_hot_matrix = tf.one_hot(labels,C,axis=0)
sess = tf.Session()
one_hot = sess.run(one_hot_matrix)
sess.close()
return one_hot
def make_mini_batches(X, Y, mini_batch_size):
m = X.shape[0]
mini_batches = []
# number of mini batches of size mini_batch_size in the dataset
num_complete_minibatches = math.floor(m/mini_batch_size)
for k in range(0, num_complete_minibatches):
mini_batch_X = X[k*mini_batch_size : (k+1)*mini_batch_size,...]
mini_batch_Y = Y[k*mini_batch_size : (k+1)*mini_batch_size,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = X[num_complete_minibatches*mini_batch_size:,...]
mini_batch_Y = Y[num_complete_minibatches*mini_batch_size:,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
First, I am reading the dataset:
# function to read the batches
def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
# note the encoding type is 'latin1'
batch = pickle.load(file, encoding='latin1')
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
labels = batch['labels']
datadict = {'data':features,'labels':labels}
return datadict
# combine batches into one dataset (batch size: 10000)
full_data = load_cfar10_batch('./cifar_10',1)['data']
full_labels = []
for i in range(5):
full_labels.extend(load_cfar10_batch('./cifar_10',i+1)['labels'])
if i > 0:
full_data = np.concatenate((full_data,load_cfar10_batch('./cifar_10',i+1)['data']),axis = 0)
# dataset sizes
full_data.shape, len(full_labels)
Followed by some preprocessing and train/validation split:
# data preprocessing (using only 1/10 of the dataset for speed)
X = full_data[0:5000]
y = one_hot_matrix(full_labels[0:5000], 10).T
# split into training-validation sets
x_train, x_val, y_train, y_val = ms.train_test_split(X, y, test_size=0.2, random_state=1)
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_train = x_train / 255.0
x_val = x_val / 255.0
print('x_train shape:',x_train.shape)
print('y_train shape:',y_train.shape)
print('x_val shape:',x_val.shape)
print('y_val shape:',y_val.shape)
Then I initialize the variables:
tf.set_random_seed(1)
seed = 3
(m, n_H, n_W, n_C) = x_train.shape
n_y = y_train.shape[1]
costs = []
print_cost = True
learning_rate = 0.001
num_epochs = 100
minibatch_size = 256
num_minibatches = int(m / minibatch_size)
minibatches = make_mini_batches(x_train, y_train, minibatch_size)
ops.reset_default_graph()
inputs = tf.placeholder(tf.float32,shape=[None, n_H, n_W, n_C],name = 'inputs')
labels = tf.placeholder(tf.int8,shape=[None, n_y],name = 'labels')
# Forward propagation (Inception)
Z = inception_v1.inception_v1(inputs,num_classes = n_y,dropout_keep_prob=1,global_pool=True)[1]['Logits']
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = Z, labels = labels))
# ADAM optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize variables
init = tf.global_variables_initializer()
And then the training loop:
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
# learning rate decay
if epoch % 8 == 0:
learning_rate *= math.pow(0.95,epoch/8)
minibatch_cost = 0.
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_ , temp_cost = sess.run([optimizer, cost], feed_dict={inputs: minibatch_X, labels: minibatch_Y})
minibatch_cost += temp_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 5 == 0:
print ("Cost after epoch %i: %f" % (epoch, minibatch_cost),", Learning rate: %f" %(learning_rate))
if print_cost == True and epoch % 1 == 0:
costs.append(minibatch_cost)
# Plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('Cost')
plt.xlabel('Iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# Calculate the correct predictions
predict_op = tf.argmax(Z, 1)
correct_prediction = tf.equal(predict_op, tf.argmax(labels, 1))
# Calculate accuracy on the validation set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#print(accuracy)
train_accuracy = accuracy.eval({inputs: x_train, labels: y_train})
val_accuracy = accuracy.eval({inputs: x_val, labels: y_val})
print("Train Accuracy:", train_accuracy)
print("Validation Accuracy:", val_accuracy)
The output seems like this:
Cost after epoch 0: 2.455999 , Learning rate: 0.001000
Cost after epoch 5: 2.454697 , Learning rate: 0.001000
Cost after epoch 10: 2.454670 , Learning rate: 0.000950
Cost after epoch 15: 2.454655 , Learning rate: 0.000950
Cost after epoch 20: 2.454650 , Learning rate: 0.000857
Cost after epoch 25: 2.454649 , Learning rate: 0.000735
Cost after epoch 30: 2.454659 , Learning rate: 0.000735
Cost after epoch 35: 2.454643 , Learning rate: 0.000599
Cost after epoch 40: 2.454627 , Learning rate: 0.000463
...
...
And so my network is not training.

I managed to find the solution. I had to put the argument scope of inception before calling it, something like this:
with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
Z = inception_v1.inception_v1(inputs,num_classes = n_y,dropout_keep_prob=1,global_pool=True)[1]['Logits']
After that, everything works just fine.

Related

Training/Test data in tensorflow example

I'm new in tensorflow and i follow example at here, but i have one question.
Codes are as follows:
import numpy as np
import tensorflow as tf
from time import time
import math
from include.data import get_data_set
from include.model import model, lr
train_x, train_y = get_data_set("train")
test_x, test_y = get_data_set("test")
x, y, output, y_pred_cls, global_step, learning_rate = model()
global_accuracy = 0
# PARAMS
_BATCH_SIZE = 128
_EPOCH = 60
_SAVE_PATH = "./tensorboard/cifar-10-v1.0.0/"
# LOSS AND OPTIMIZER
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
beta1=0.9,
beta2=0.999,
epsilon=1e-08).minimize(loss, global_step=global_step)
# PREDICTION AND ACCURACY CALCULATION
correct_prediction = tf.equal(y_pred_cls, tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# SAVER
merged = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session()
train_writer = tf.summary.FileWriter(_SAVE_PATH, sess.graph)
try:
print("\nTrying to restore last checkpoint ...")
last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=_SAVE_PATH)
saver.restore(sess, save_path=last_chk_path)
print("Restored checkpoint from:", last_chk_path)
except ValueError:
print("\nFailed to restore checkpoint. Initializing variables instead.")
sess.run(tf.global_variables_initializer())
def train(epoch):
batch_size = int(math.ceil(len(train_x) / _BATCH_SIZE))
i_global = 0
for s in range(batch_size):
batch_xs = train_x[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
batch_ys = train_y[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
start_time = time()
i_global, _, batch_loss, batch_acc = sess.run(
[global_step, optimizer, loss, accuracy],
feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)})
duration = time() - start_time
if s % 10 == 0:
percentage = int(round((s/batch_size)*100))
bar_len = 29
filled_len = int((bar_len*int(percentage))/100)
bar = '=' * filled_len + '>' + '-' * (bar_len - filled_len)
msg = "Global step: {:>5} - [{}] {:>3}% - acc: {:.4f} - loss: {:.4f} - {:.1f} sample/sec"
print(msg.format(i_global, bar, percentage, batch_acc, batch_loss, _BATCH_SIZE / duration))
test_and_save(i_global, epoch)
def test_and_save(_global_step, epoch):
global global_accuracy
i = 0
predicted_class = np.zeros(shape=len(test_x), dtype=np.int)
while i < len(test_x):
j = min(i + _BATCH_SIZE, len(test_x))
batch_xs = test_x[i:j, :]
batch_ys = test_y[i:j, :]
predicted_class[i:j] = sess.run(
y_pred_cls,
feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)}
)
i = j
correct = (np.argmax(test_y, axis=1) == predicted_class)
acc = correct.mean()*100
correct_numbers = correct.sum()
mes = "\nEpoch {} - accuracy: {:.2f}% ({}/{})"
print(mes.format((epoch+1), acc, correct_numbers, len(test_x)))
if global_accuracy != 0 and global_accuracy < acc:
summary = tf.Summary(value=[
tf.Summary.Value(tag="Accuracy/test", simple_value=acc),
])
train_writer.add_summary(summary, _global_step)
saver.save(sess, save_path=_SAVE_PATH, global_step=_global_step)
mes = "This epoch receive better accuracy: {:.2f} > {:.2f}. Saving session..."
print(mes.format(acc, global_accuracy))
global_accuracy = acc
elif global_accuracy == 0:
global_accuracy = acc
print("###########################################################################################################")
def main():
for i in range(_EPOCH):
print("\nEpoch: {0}/{1}\n".format((i+1), _EPOCH))
train(i)
if __name__ == "__main__":
main()
sess.close()
In this example, i think, both test and traning data feeds networks, normally only train data must feed network. I can not see any difference between train() and test_and_save() functions. Am i wrong? Thanks
Here is an explanation if I understood your question correctly. The train function is called every epoch and iterates through the training data. At the end of the epoch the test_and_save function is called where the model accuracy is evaluated. This iterates through the test data on the learned weights and calculates the accuracy and saves the model. This is repeated _EPOCH times.
Edit: The model is saved in the test_and_save function. However, the weights are only updated (gradients calculated) when optimizer is passed through sess.run() in the train function. In the test_and_save function the test data is fed to the network however only y_pred_cls is evaluated by passing to sess.run().

Implement TFlearn imdb lstm example by tensorflow

I'm implementing tflearn's lstm imdb example by tensorflow.
I used the same dataset, architecture and hyper-parameters (embedding size, max length of sentence and so on) as tflearn model do, but my model's performance is poor than the tflearn example(after 10 epochs, my model got about 52% accuracy while the example got near 80% ).
I'd appreciated it a lot if you can give me some advice to achieve the appropriate performance of the example.
Below is my code:
import tensorflow as tf
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tensorflow.contrib.rnn import BasicLSTMCell
import time
n_class = 2
n_words = 10000
EMBEDDING_SIZE = 128
HIDDEN_SIZE = 128
MAX_LENGTH = 100
lr = 1e-3
epoch = 10
TRAIN_SIZE = 22500
validation_size = 2500
batch_size = 128
KP = 0.8
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=n_words,
valid_portion=0.1, sort_by_len=False)
trainX, trainY = train
validationX, validationY = test
testX, testY = _
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=MAX_LENGTH, value=0.)
validationX = pad_sequences(validationX, maxlen=MAX_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, n_class)
validationY = to_categorical(validationY, n_class)
testY = to_categorical(testY, n_class)
graph = tf.Graph()
with graph.as_default():
# input
text = tf.placeholder(tf.int32, [None, MAX_LENGTH])
labels = tf.placeholder(tf.float32, [None, n_class])
keep_prob = tf.placeholder(tf.float32)
embeddings_var = tf.Variable(tf.truncated_normal([n_words, EMBEDDING_SIZE]), trainable=True)
text_embedded = tf.nn.embedding_lookup(embeddings_var, text)
print(text_embedded.shape) # [batch_size, length, embedding_size]
word_list = tf.unstack(text_embedded, axis=1)
cell = BasicLSTMCell(HIDDEN_SIZE)
dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
outputs, encoding = tf.nn.static_rnn(dropout_cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(outputs[-1], n_class, activation=None)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
prediction = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(labels, 1)), tf.float32))
train_steps = epoch * TRAIN_SIZE // batch_size + 1
print("Train steps: ", train_steps)
with tf.Session(graph=graph) as sess:
tf.global_variables_initializer().run()
print("Initialized!")
s = time.time()
offset = 0
for step in range(train_steps):
offset = (offset * step) % (TRAIN_SIZE - batch_size)
batch_text = trainX[offset: offset + batch_size, :]
batch_label = trainY[offset: offset + batch_size, :]
fd = {text: batch_text, labels: batch_label, keep_prob: KP}
_, l, acc = sess.run([optimizer, loss, accuracy], feed_dict=fd)
if step % 100 == 0:
print("Step: %d loss: %f accuracy: %f" % (step, l, acc))
if step % 500 == 0:
v_l, v_acc = sess.run([loss, accuracy], feed_dict={
text: validationX,
labels: validationY,
keep_prob: 1.0
})
print("------------------------------------------------")
print("Validation: step: %d loss: %f accuracy: %f" % (step, v_l, v_acc))
print("------------------------------------------------")
print("Training finished, time consumed:", time.time() - s, " s")
print("Test accuracy: %f" % accuracy.eval(feed_dict={
text: testX,
labels: testY,
keep_prob: 1.0
}))
Sorry, I made a stupid mistake!
The loss :
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
are supposed to be
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
then, the accuracy is like the tflearn example

Tensorflow does not train CIFAR - 100 data

I am trying to build a linear classifier with CIFAR - 100 using TensorFlow. I got the code from Martin Gorner's MNIST tutorial and change a bit. When I run this code, tensorflow does not training (code is running but accuracy remains 1.0 and loss(cross entropy remains as 4605.17), I don't know what is wrong, I am actually newbie to TF any help is appreciated.
import pickle
import numpy as np
import os
import tensorflow as tf
from tensorflow.python.framework import tensor_util
import math
#imports data
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
cifar100_test = {}
cifar100_train = {}
labelMap = {}
labelNames = {}
# Load the raw CIFAR-10 data.
cifar100_test = unpickle('dataset/cifar-100-python/test')
cifar100_train = unpickle('dataset/cifar-100-python/train')
labelMap = unpickle('dataset/cifar-100-python/meta')
#tr for training data and te for testing data, X is data, Y is label
Xtr = cifar100_train[b'data']
Yr = cifar100_train[b'fine_labels']
Xte = cifar100_test[b'data']
Ye = cifar100_test[b'fine_labels']
classNames = labelMap[b'fine_label_names']
num_train = Xtr.shape[0]
num_test = Xte.shape[0]
num_class = len(classNames)
Ytr = np.zeros([num_train, num_class])
Yte = np.zeros([num_test, num_class])
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
# As a sanity check, we print out the size of the training and test data.
print('Train data shape:', Xtr.shape)
print('Train Label shape:', Ytr.shape)
print('Test data shape:', Xte.shape)
print('Test Label shape:', Yte.shape)
print('Name of Predicted Class:', classNames[0]) #indice of the label name is the indice of the class.
Xtrain = Xtr#[:1000]
Xtest = Xte#[:100]
Ytrain = Ytr#[:1000]
Ytest = Yte#[:100]
print('Train data shape:', Xtrain.shape)
print('Train Label shape:', Ytrain.shape)
print('Test data shape:', Xtest.shape)
print('Test Label shape:', Ytest.shape)
Xtrain = np.reshape(Xtrain,(50000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xtest = np.reshape(Xtest,(10000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xbatches = np.split(Xtrain, 500); #second number is # of batches
Ybatches = np.split(np.asarray(Ytrain), 500);
XtestB = np.split(Xtest, 100);
YtestB = np.split(Ytest, 100);
print('X # of batches:', len(Xbatches))
print('Y # of batches:', len(Ybatches))
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [100, 32, 32, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [100, 100])
# weights W[784, 10] 784=28*28
W = tf.Variable(tf.zeros([3072, 100]))
# biases b[10]
b = tf.Variable(tf.zeros([100]))
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 3072])
# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# *10 because "mean" included an unwanted division by 10
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
for i in range(100):
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))
You compute the accuracy a hundred times after the training process is completed. Nothing will change there. You should place your print('accuracy:'....) within the for loop in which you perform the backpropagation:
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))
Sorry for the post it turns out that it is a basic mistake.
I changed following;
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
with
Ytr[range(num_train), Yr_temp[range(num_train)]] = 1
Yte[range(num_test), Ye_temp[range(num_test)]] = 1
First one make all values 1, but I just wanted to make indice of the true class 1 and other elements 0. Thanks for your time.

How to implement metrics learning using siamese neural network in Tensorflow

I'm trying to implement metrics learning using Contrastive Loss like in Caffe example and plot results like in example:
(source: researchgate.net)
I tried to use simple fully connected layers in Caffe and it works well (result as on picture above),
but I get different result
Could anyone help me to find issue in my code or suggest how to implement Caffe example in Tensorflow.
Here is my code:
# In[1]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers.python.layers import initializers
from tensorflow.examples.tutorials.mnist import input_data
from math import sqrt
import numpy as np
from sklearn.manifold import TSNE
get_ipython().magic('matplotlib inline')
get_ipython().magic('pylab inline')
# In[2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
# In[3]:
learning_rate = 0.00001
training_epochs = 15
batch_size = 100
display_step = 1
logs_path = './tensorflow_logs/mnist_metrics'
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 28*28 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
margin = 1.0
# In[4]:
x_left = tf.placeholder(tf.float32, shape=[None, n_input], name='InputDataLeft')
x_right = tf.placeholder(tf.float32, shape=[None, n_input], name='InputDataRight')
label = tf.placeholder(tf.float32, shape=[None, 1], name='LabelData') # 0 if the same, 1 is different
x_image_left = x_left
x_image_right = x_right
# In[5]:
# def NN(inputs):
# In[6]:
def tfNN(x, weights, biases):
x = tf.scalar_mul(1.0/256.0, x)
layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
out_layer = tf.add(tf.matmul(layer_3, weights['w4']), biases['b4'])
return out_layer
# In[7]:
# Store layers weight & bias
weights = {
'w1': tf.Variable(tf.random_uniform([n_input, n_hidden_1], minval=-4*np.sqrt(6.0/(n_input + n_hidden_1)), maxval=4*np.sqrt(6.0/(n_input + n_hidden_1))), name='W1'),
'w2': tf.Variable(tf.random_uniform([n_hidden_1, n_hidden_2], minval=-4*np.sqrt(6.0/(n_hidden_1 + n_hidden_2)), maxval=4*np.sqrt(6.0/(n_hidden_1 + n_hidden_2))), name='W2'),
'w3': tf.Variable(tf.random_uniform([n_hidden_2, n_classes], minval=-4*np.sqrt(6.0/(n_hidden_2 + n_classes)), maxval=4*np.sqrt(6.0/(n_hidden_2 + n_classes))), name='W3'),
'w4': tf.Variable(tf.random_uniform([n_classes, 2], minval=-4*np.sqrt(6.0/(n_classes + 2)), maxval=4*np.sqrt(6.0/(n_classes + 2))), name='W4')
}
biases = {
'b1': tf.Variable(tf.truncated_normal([n_hidden_1]) / sqrt(n_hidden_1), name='b1'),
'b2': tf.Variable(tf.truncated_normal([n_hidden_2]) / sqrt(n_hidden_2), name='b2'),
'b3': tf.Variable(tf.truncated_normal([n_classes]) / sqrt(n_classes), name='b3'),
'b4': tf.Variable(tf.truncated_normal([2]) / sqrt(2), name='b4')
}
# In[8]:
with tf.name_scope('Model'):
# Model
pred_left = tfNN(x_image_left, weights, biases)
pred_right = tfNN(x_image_right, weights, biases)
with tf.name_scope('Loss'):
# Minimize error using cross entropy
# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
d = tf.reduce_sum(tf.square(pred_left - pred_right), 1)
d_sqrt = tf.sqrt(d)
loss = label * tf.square(tf.maximum(0.0, margin - d_sqrt)) + (1 - label) * d
loss = 0.5 * tf.reduce_mean(loss)
with tf.name_scope('AdamOptimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# In[9]:
# Initializing the variables
init = tf.global_variables_initializer()
# Create a summary to monitor cost tensor
tf.scalar_summary("loss", loss)
# Merge all summaries into a single op
merged_summary_op = tf.merge_all_summaries()
# In[10]:
# Launch the graph
sess = tf.Session()
sess.run(init)
# op to write logs to Tensorboard
summary_writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())
# Training cycle
for epoch in range(training_epochs):
avg_loss = 0.0
total_batch = int(mnist.train.num_examples / batch_size)
# Loop over all batches
for i in range(total_batch):
left_batch_xs, left_batch_ys = mnist.train.next_batch(batch_size)
right_batch_xs, right_batch_ys = mnist.train.next_batch(batch_size)
labels = np.zeros((batch_size, 1))
for l in range(batch_size):
if left_batch_ys[l] == right_batch_ys[l]:
labels[l, 0] = 0.0
else:
labels[l, 0] = 1.0
_, l, summary = sess.run([optimizer, loss, merged_summary_op],
feed_dict = {
x_left: left_batch_xs,
x_right: right_batch_xs,
label: labels,
})
# Write logs at every iteration
summary_writer.add_summary(summary, epoch * total_batch + i)
# Compute average loss
avg_loss += l / total_batch
# Display logs per epoch step
if (epoch+1) % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "loss =", "{:.9f}".format(avg_loss))
print ("Optimization Finished!")
print ("Run the command line:\n" "--> tensorboard --logdir=./tensorflow_logs " "\nThen open http://0.0.0.0:6006/ into your web browser")
# In[11]:
# Test model
# Calculate accuracy
test_xs, test_ys = mnist.train.next_batch(5000)
ans = sess.run([pred_left], feed_dict = { x_left: test_xs})
# In[12]:
ans = ans[0]
# In[13]:
#test_ys
# In[14]:
figure(figsize=(10,10))
# scatter(r[:,0], r[:,1], c=[test_ys[x,:].argmax() for x in range(len(test_ys))])
scatter(ans[:,0], ans[:,1], c=test_ys[:])
I found issue in my Contrastive Loss implementation. It requires set keep_dims=True in distance calculation.
Here is correct:
with tf.name_scope('Loss'):
d = tf.reduce_sum(tf.square(tf.sub(pred_left, pred_right)), 1, keep_dims=True)
d_sqrt = tf.sqrt(d)
loss = label * tf.square(tf.maximum(0.0, margin - d_sqrt)) + (1 - label) * d
loss = 0.5 * tf.reduce_mean(loss)
Now I have correct result:

Requesting multiple values from graph at same time

In the code below l2 surprisingly returns the same value as l1, but since the optimizer is being requested in the list before l2, I expected the loss to be the new loss after training. Can I not request multiple values at the same time from the graph and expect consistent output?
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=[None, 10])
y = tf.placeholder(tf.float32, shape=[None, 2])
weight = tf.Variable(tf.random_uniform((10, 2), dtype=tf.float32))
loss = tf.nn.sigmoid_cross_entropy_with_logits(tf.matmul(x, weight), y)
optimizer = tf.train.AdamOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.initialize_all_variables().run()
X = np.random.rand(1, 10)
Y = np.array([[0, 1]])
# Evaluate loss before running training step
l1 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l1) # 3.32393
# Running the training step
_, l2 = sess.run([optimizer, loss], feed_dict={x: X, y: Y})
print(l2[0][0]) # 3.32393 -- didn't change?
# Evaluate loss again after training step as sanity check
l3 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l3) # 2.71041
No - the order in which you request them in the list has no effect on the evaluation order. For side-effect-having operations such as the optimizer, if you want to guarantee a specific ordering, you need to enforce it using with_dependencies or similar control-flow constructs. In general, ignoring side-effects, TensorFlow will return results to you by grabbing the node from the graph as soon as it's computed - and, obviously, the loss is computed before the optimizer, since the optimizer requires the loss as one of its input. (Remember that 'loss' is not a variable; it's a tensor; so it's not actually affected by the optimizer step.)
sess.run([loss, optimizer], ...)
and
sess.run([optimizer, loss], ...)
are equivalent.
As Dave points out, the order of arguments to Session.run() has no effect on the order of evaluation, and the loss tensor in your example does not have a dependency on the optimizer op. To add a dependency, you could use tf.control_dependencies() to add an explicit dependency on the optimizer running before fetching the loss:
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
_, l2 = sess.run([optimizer, loss_after_optimizer], feed_dict={x: X, y: Y})
I've tested logistic regression implemented in tensorflow with three ways of session.run:
all together
res1, res2, res3 = sess.run([op1, op2, op3])
separately
res1 = sess.run(op1)
res2 = sess.run(op2)
res3 = sess.run(op3)
with dependencies
with tf.control_dependencies([op1]):
op2_after = tf.identity(op1)
op3_after = tf.identity(op1)
res1,res2,res3 = session.run([op1, op2_after, op3_after])
set batch size as 10000, the result is:
1: 0.05+ secs < 2: 0.11+ secs < 3: 0.25+ secs
The main difference between 1 and 3 is only one mini-batch. It may not worth it to use 3 instead of 1.
Here is the test code (it is an LR example written by someone else...).
Here is the data
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 2 13:38:14 2017
#author: inse7en
"""
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import time
pickle_file = '/Users/inse7en/Downloads/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
# This is to expedite the process
train_subset = 10000
# This is a good beta value to start with
beta = 0.01
graph = tf.Graph()
with graph.as_default():
# Input data.
# They're all constants.
tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
tf_train_labels = tf.constant(train_labels[:train_subset])
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables
# They are variables we want to update and optimize.
weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros([num_labels]))
# Training computation.
logits = tf.matmul(tf_train_dataset, weights) + biases
# Original loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Loss function using L2 Regularization
regularizer = tf.nn.l2_loss(weights)
loss = tf.reduce_mean(loss + beta * regularizer)
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
num_steps = 50
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
with tf.Session(graph=graph) as session:
# This is a one-time operation which ensures the parameters get initialized as
# we described in the graph: random weights for the matrix, zeros for the
# biases.
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
# Run the computations. We tell .run() that we want to run the optimizer,
# and get the loss value and the training predictions returned as numpy
# arrays.
#_, l, predictions = session.run([optimizer, loss, train_prediction])
start_time = time.time()
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
predictions_after = tf.identity(train_prediction)
regularizers_after = tf.identity(regularizer)
_, l, predictions,regularizers = session.run([optimizer, loss_after_optimizer, predictions_after, regularizers_after])
print("--- with dependencies: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#opt = session.run(optimizer)
#l = session.run(loss)
#predictions = session.run(train_prediction)
#regularizers = session.run(regularizer)
#print("--- run separately: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#_, l, predictions,regularizers = session.run([optimizer, loss, train_prediction, regularizer])
#print("--- all together: %s seconds ---" % (time.time() - start_time))
#if (step % 100 == 0):
#print('Loss at step {}: {}'.format(step, l))
#print('Training accuracy: {:.1f}'.format(accuracy(predictions,
#train_labels[:train_subset, :])))
# Calling .eval() on valid_prediction is basically like calling run(), but
# just to get that one numpy array. Note that it recomputes all its graph
# dependencies.
# You don't have to do .eval above because we already ran the session for the
# train_prediction
#print('Validation accuracy: {:.1f}'.format(accuracy(valid_prediction.eval(),
#valid_labels)))
#print('Test accuracy: {:.1f}'.format(accuracy(test_prediction.eval(), test_labels)))
#print(regularizer)