My code is running perfectly with Gradient Descent, but I want to compare the effectiveness of my algorithm using Adam Optimizer, so I tried to modify the following code:
# Import MNIST data
#import input_data
#mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
#fashion_mnist = input_data.read_data_sets('data/fashion')
import tensorflow as tf
# Set parameters
learning_rate = 0.01 #1e-4
training_iteration = 30
batch_size = 100
display_step = 2
# TF graph input
x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
#regularizer = tf.reduce_sum(tf.square(y))
# Create a model
# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
with tf.name_scope("Wx_b") as scope:
# Construct a linear model
model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
# Add summary ops to collect data
w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("biases", b)
# More name scopes will clean up graph representation
with tf.name_scope("cost_function") as scope:
# Minimize error using cross entropy
# Cross entropy
cost_function = -tf.reduce_sum(y*tf.log(model))
# Create a summary to monitor the cost function
tf.summary.scalar("cost_function", cost_function)
with tf.name_scope("train") as scope:
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
# Initializing the variables
#init = tf.initialize_all_variables()
init = tf.global_variables_initializer()
# Merge all summaries into a single operator
merged_summary_op = tf.summary.merge_all()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary_writer = tf.summary.FileWriter('/home/raed/Tensorflow/tensorflow_demo', graph_def =sess.graph_def)
#writer.add_graph(sess.graph_def)
# Training cycle
for iteration in range(training_iteration):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute the average loss
avg_cost += sess.run(cost_function, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Write logs for each iteration
summary_str = sess.run(merged_summary_op, feed_dict={x: batch_xs, y: batch_ys})
summary_writer.add_summary(summary_str, iteration*total_batch + i)
# Display logs per iteration step
if iteration % display_step == 0:
print ("Iteration:" "%04d" % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
print ("Tuning completed!")
# Test the model
predictions = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(predictions, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
to use Adam Optimizer I tried to change the following line :
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
and replace it with the AdamOptimizer :
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_function)
when I ran the code , I got few iteration and then it stopped with the following error.
InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights
[[Node: weights = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](weights/tag, Variable/read)]]
could you please help me understnad the problem , thanks in advance
the problem is weights are initialized to zero W = tf.Variable(tf.zeros([784, 10])) that`s why you re get Nan as weights.
you need to inialize them with some initializer i.e normal distribution as follow
W = tf.Variable(tf.random_normal([784, 10], stddev=0.35),
name="weights")
Related
I have the tf.event files present in folder, I input the command to view but yet I am not able to see the graph
Please find the code attached, the code related to graph is provided.
I am using tensorflow 1.8, upgrading had lot of issues, so i am using lower version.
#Initialize the FileWriter
with tf.Session() as sess:
writer = tf.summary.FileWriter("./Training_FileWriter/", sess.graph)
writer1 = tf.summary.FileWriter("./Validation_FileWriter/", sess.graph)
#Add the cost and accuracy to summary
tf.summary.scalar('loss', tf.squeeze(cross_entropy))
tf.summary.scalar('accuracy', tf.squeeze(accuracy))
#Merge all summaries together
merged_summary = tf.summary.merge_all()
#
#
#After executing loss, optimizer, accuracy
summ = sess.run(merged_summary, feed_dict=feed_dict_train)
writer.add_summary(summ, epoch*int(len(trainLabels)/batch_size) + batch)
Will it help if you have a full-fledged example like this ? I am able to view the graphs.
tensorboard --logdir=D:\Development_Avecto\TensorFlow\logs\1\train
TensorBoard 1.9.0 at http://LT032871:6006 (Press CTRL+C to quit)
import tensorflow as tf
# reset everything to rerun in jupyter
tf.reset_default_graph()
# config
batch_size = 100
learning_rate = 0.5
training_epochs = 5
logs_path = "D:/Development_Avecto/TensorFlow/logs/1/train"
# load mnist data set
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# input images
with tf.name_scope('input'):
# None -> batch size can be any size, 784 -> flattened mnist image
x = tf.placeholder(tf.float32, shape=[None, 784], name="x-input")
# target 10 output classes
y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input")
# model parameters will change during training so we use tf.Variable
with tf.name_scope("weights"):
W = tf.Variable(tf.zeros([784, 10]))
# bias
with tf.name_scope("biases"):
b = tf.Variable(tf.zeros([10]))
# implement model
with tf.name_scope("softmax"):
# y is our prediction
y = tf.nn.softmax(tf.matmul(x, W) + b)
# specify cost function
with tf.name_scope('cross_entropy'):
# this is our cost
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# specify optimizer
with tf.name_scope('train'):
# optimizer is an "operation" which we can execute in a session
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
with tf.name_scope('Accuracy'):
# Accuracy
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# create a summary for our cost and accuracy
tf.summary.scalar("cost", cross_entropy)
tf.summary.scalar("accuracy", accuracy)
# merge all summaries into a single "operation" which we can execute in a session
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
# variables need to be initialized before we can use them
sess.run(tf.initialize_all_variables())
# create log writer object
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
# perform training cycles
for epoch in range(training_epochs):
# number of batches in one epoch
batch_count = int(mnist.train.num_examples / batch_size)
for i in range(batch_count):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# perform the operations we defined earlier on batch
_, summary = sess.run([train_op, summary_op], feed_dict={x: batch_x, y_: batch_y})
# write log
writer.add_summary(summary, epoch * batch_count + i)
if epoch % 5 == 0:
print
"Epoch: ", epoch
print
"Accuracy: ", accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})
print
"done"
I implemented the linear regression model shown on Tensorflow's main page: https://www.tensorflow.org/get_started/get_started
import numpy as np
import tensorflow as tf
# Model parameters
W = tf.Variable([.3], tf.float32)
b = tf.Variable([-.3], tf.float32)
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training data
x_train = [1,2,3,4]
y_train = [0,-1,-2,-3]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(1000):
sess.run(train, {x:x_train, y:y_train})
# evaluate training accuracy
curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x:x_train, y:y_train})
print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))
However, when I change the training data to x_train=[2,4,6,8] and y_train=[3,4,5,6],
the loss starts to increase over time until it reaches 'nan'
As suggested by Steven, you should probably use reduce_mean(), which seems to fix the problem of the increasing loss function. Note that I also increased the number of training steps since reduce_mean() appears to need a bit longer to converge. Be careful with increasing the learning rate, since this may reproduce the problem. Instead, if training time is not a critical factor, you might want to decrease the learning rate and increase the number of training iterations further.
With the reduce_sum() function it worked well for me after decreasing the learning rate from 0.01 to 0.001. Again, thanks to Steven for the suggestion.
import numpy as np
import tensorflow as tf
# Model parameters
W = tf.Variable([.3], tf.float32)
b = tf.Variable([-.3], tf.float32)
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_mean(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training data
x_train = [2,4,6,8]
y_train = [0,3,4,5]
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(5000):
sess.run(train, {x:x_train, y:y_train})
# evaluate training accuracy
curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x:x_train, y:y_train})
print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))
The one-D data concludes 80 samples, with everyone is 1089 length. I want to use 70 samples to training and 10 samples to testing.
I am totally beginner in python and tensorflow, so I use the code which is processing image(which is two-dimension). Here is the code I use(all the parameters are pretty low for I just want to test the code):
import tensorflow as tf
import scipy.io as sc
from tensorflow.python.ops import rnn, rnn_cell
# data read
feature_training = sc.loadmat("feature_training.mat")
feature_training = feature_training['feature_training']
print (feature_training.shape)
feature_testing = sc.loadmat("feature_testing.mat")
feature_testing = feature_testing['feature_testing']
print (feature_testing.shape)
label_training = sc.loadmat("label_training.mat")
label_training = label_training['label_training']
print (label_training.shape)
label_testing = sc.loadmat("label_testing.mat")
label_testing = label_testing['label_testing']
print (label_testing.shape)
# parameters
learning_rate = 0.1
training_iters = 100
batch_size = 70
display_step = 10
# network parameters
n_input = 70 # MNIST data input (img shape: 28*28)
n_steps = 100 # timesteps
n_hidden = 10 # hidden layer num of features
n_classes = 2 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rgnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = feature_training.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
# loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print ("Iter " + str(step*batch_size) + ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
print ("Optimization Finished!")
# Calculate accuracy for 10 testing data
test_len = 10
test_data = feature_testing[:test_len].reshape((-1, n_steps, n_input))
test_label = label_testing[:test_len]
print ("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
At last, it turns out the Error:
Traceback (most recent call last):
File "/home/xiangzhang/MNIST data test.py", line 92, in <module>
batch_x, batch_y = feature_training.batch(batch_size)
AttributeError: 'numpy.ndarray' object has no attribute 'next_batch'
I thought it must be related with the dimension of the data, but I do not know how to fix it. Please help me, thanks very much.
I want to adapt this Recurrent Neural Network in Tensorflow (from this tutorial
https://github.com/aymericdamien/TensorFlow-Examples/
and then the RNN program)
), so that it will be a denoising autoencoder.
I have 5 time steps, and at each time, the noiseless target is sampled from sin(x), and the noisy input is sin(x)+ Gaussian error.
Now my problem is that the RNN from the example gives me 1 output value for each sequence of inputs, but I want an output for each time step ( I want 5 outputs, not 1)
How do I do this? I suspect it may be a matter of redefining the weights and biases, but how?
Here is the code. Many thanks for your help,
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
# Parameters
learning_rate = 0.0005
training_iters = 1000
batch_size = 3
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 5 # timesteps
n_hidden = 40 # hidden layer num of features
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_input])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
'out': tf.Variable(tf.random_normal([ n_output]))
}
# length of time series to be sampled
N = 1000000
dim_input = 2
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.05)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.05)
def next_batch():
batch = np.empty([batch_size,n_steps,dim_input])
batch_y = np.empty([batch_size,n_steps,dim_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps- 1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
def RNN(x, weights, biases):
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
# SSE, there must be an easier way to do this
def get_cost(prediction,truth):
z = 0
for i in range(0,batch_size):
z = z + np.square(np.add(prediction[i,:], np.multiply(-1,truth[i,:])))
z = np.add(z[0],z[1])
z = np.sum(z)
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).
minimize(cost)
# Evaluate model
accuracy = cost
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(loss)
step += 1
print("Optimization Finished!")
If I run this, I get this error message:
ValueError: Shape (?, 5, 2) must have rank 2. This seems fair enough, because the target is 5 steps long, and the output only 1. But how do I fix that?
Many thanks.
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
import matplotlib.pyplot as plt
## Denoising autoencoder.
import numpy as np
count = 0
# length of time series to be sampled
N = 10000
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
batch_size = 30
learning_rate = 0.0005
training_iters = 300000
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 15 # timesteps
n_hidden = 75 # hidden layer num of
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.1)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.1)
def next_batch():
batch = np.empty([batch_size,n_steps,n_input])
batch_y = np.empty([batch_size,n_steps,n_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps-1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
# Parameters
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_output])
N_train = N - 500
def RNN(x):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(num_units = n_hidden, forget_bias=1.0, num_proj=2)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
return outputs
print(x)
pred = RNN(x)
# Define loss and optimizer
def get_cost(prediction,truth):
#print('pred' + str(prediction))
# SSE. there must be an easier way than this:
z = 0
for step in range(0,n_steps):
for b in range(0,batch_size):
for y_dim in range(0,2):
d1 = prediction[step][b,y_dim]
d2 = truth[b,step,y_dim]
diff= (d1 - d2 )
z = z + diff * diff
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
#print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
# Reshape data to get 28 seq of 28 elements
#batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(str(step) + ':' + str(loss))
step += 1
print("Optimization Finished!")
batch_size = 1
test_data, test_label, inits = next_batch()
#print "Testing Accuracy:", \
#sess.run(accuracy, feed_dict={x: test_data, y: test_label})
p2 = sess.run(pred, feed_dict={x: test_data, y: test_label})
#print('---inits---')
#print(inits)
print('---batch---')
print(test_data)
print('---truth---')
print(test_label)
print('---pred---')
print(p2)
c_final = get_cost(p2, test_label)
print(c_final)
First, we generate some data: a 2-dimensional series of sin(i) and cos(i), with i running from 1 to N. This gives us the variable y. Then we add some Normal noise to this series, and that's x. Then, we train a Recurrent Neural Net to create the clean output from the noisy input. In other words, we train the net such that it will output [cos(i),sin(i)] from input [cos(i)+e1,sin(i)+e2) ]. This is a plain vanilla denoising autoencoder, except that the data has a time element. Now you can feed new data into the neural net, and it will hopefully remove the noise.
In the code below l2 surprisingly returns the same value as l1, but since the optimizer is being requested in the list before l2, I expected the loss to be the new loss after training. Can I not request multiple values at the same time from the graph and expect consistent output?
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=[None, 10])
y = tf.placeholder(tf.float32, shape=[None, 2])
weight = tf.Variable(tf.random_uniform((10, 2), dtype=tf.float32))
loss = tf.nn.sigmoid_cross_entropy_with_logits(tf.matmul(x, weight), y)
optimizer = tf.train.AdamOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
tf.initialize_all_variables().run()
X = np.random.rand(1, 10)
Y = np.array([[0, 1]])
# Evaluate loss before running training step
l1 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l1) # 3.32393
# Running the training step
_, l2 = sess.run([optimizer, loss], feed_dict={x: X, y: Y})
print(l2[0][0]) # 3.32393 -- didn't change?
# Evaluate loss again after training step as sanity check
l3 = sess.run([loss], feed_dict={x: X, y: Y})[0][0][0]
print(l3) # 2.71041
No - the order in which you request them in the list has no effect on the evaluation order. For side-effect-having operations such as the optimizer, if you want to guarantee a specific ordering, you need to enforce it using with_dependencies or similar control-flow constructs. In general, ignoring side-effects, TensorFlow will return results to you by grabbing the node from the graph as soon as it's computed - and, obviously, the loss is computed before the optimizer, since the optimizer requires the loss as one of its input. (Remember that 'loss' is not a variable; it's a tensor; so it's not actually affected by the optimizer step.)
sess.run([loss, optimizer], ...)
and
sess.run([optimizer, loss], ...)
are equivalent.
As Dave points out, the order of arguments to Session.run() has no effect on the order of evaluation, and the loss tensor in your example does not have a dependency on the optimizer op. To add a dependency, you could use tf.control_dependencies() to add an explicit dependency on the optimizer running before fetching the loss:
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
_, l2 = sess.run([optimizer, loss_after_optimizer], feed_dict={x: X, y: Y})
I've tested logistic regression implemented in tensorflow with three ways of session.run:
all together
res1, res2, res3 = sess.run([op1, op2, op3])
separately
res1 = sess.run(op1)
res2 = sess.run(op2)
res3 = sess.run(op3)
with dependencies
with tf.control_dependencies([op1]):
op2_after = tf.identity(op1)
op3_after = tf.identity(op1)
res1,res2,res3 = session.run([op1, op2_after, op3_after])
set batch size as 10000, the result is:
1: 0.05+ secs < 2: 0.11+ secs < 3: 0.25+ secs
The main difference between 1 and 3 is only one mini-batch. It may not worth it to use 3 instead of 1.
Here is the test code (it is an LR example written by someone else...).
Here is the data
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 2 13:38:14 2017
#author: inse7en
"""
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import time
pickle_file = '/Users/inse7en/Downloads/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
# This is to expedite the process
train_subset = 10000
# This is a good beta value to start with
beta = 0.01
graph = tf.Graph()
with graph.as_default():
# Input data.
# They're all constants.
tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
tf_train_labels = tf.constant(train_labels[:train_subset])
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables
# They are variables we want to update and optimize.
weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros([num_labels]))
# Training computation.
logits = tf.matmul(tf_train_dataset, weights) + biases
# Original loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Loss function using L2 Regularization
regularizer = tf.nn.l2_loss(weights)
loss = tf.reduce_mean(loss + beta * regularizer)
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
num_steps = 50
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
with tf.Session(graph=graph) as session:
# This is a one-time operation which ensures the parameters get initialized as
# we described in the graph: random weights for the matrix, zeros for the
# biases.
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
# Run the computations. We tell .run() that we want to run the optimizer,
# and get the loss value and the training predictions returned as numpy
# arrays.
#_, l, predictions = session.run([optimizer, loss, train_prediction])
start_time = time.time()
with tf.control_dependencies([optimizer]):
loss_after_optimizer = tf.identity(loss)
predictions_after = tf.identity(train_prediction)
regularizers_after = tf.identity(regularizer)
_, l, predictions,regularizers = session.run([optimizer, loss_after_optimizer, predictions_after, regularizers_after])
print("--- with dependencies: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#opt = session.run(optimizer)
#l = session.run(loss)
#predictions = session.run(train_prediction)
#regularizers = session.run(regularizer)
#print("--- run separately: %s seconds ---" % (time.time() - start_time))
#start_time = time.time()
#_, l, predictions,regularizers = session.run([optimizer, loss, train_prediction, regularizer])
#print("--- all together: %s seconds ---" % (time.time() - start_time))
#if (step % 100 == 0):
#print('Loss at step {}: {}'.format(step, l))
#print('Training accuracy: {:.1f}'.format(accuracy(predictions,
#train_labels[:train_subset, :])))
# Calling .eval() on valid_prediction is basically like calling run(), but
# just to get that one numpy array. Note that it recomputes all its graph
# dependencies.
# You don't have to do .eval above because we already ran the session for the
# train_prediction
#print('Validation accuracy: {:.1f}'.format(accuracy(valid_prediction.eval(),
#valid_labels)))
#print('Test accuracy: {:.1f}'.format(accuracy(test_prediction.eval(), test_labels)))
#print(regularizer)