Tensorflow while_loop for training - tensorflow

In my problem I need run GD with 1 example from data on each training step. It's known problem that session.run() has overhead and therefore it is too long to train model.
In attempt to avoid overhead I tried to use while_loop and train model on all data with one run() call. But it approach don't work and train_op don't execute even ones. Below simple example of what I'm doing:
data = [k*1. for k in range(10)]
tf.reset_default_graph()
i = tf.Variable(0, name='loop_i')
q_x = tf.FIFOQueue(100000, tf.float32)
q_y = tf.FIFOQueue(100000, tf.float32)
x = q_x.dequeue()
y = q_y.dequeue()
w = tf.Variable(0.)
b = tf.Variable(0.)
loss = (tf.add(tf.mul(x, w), b) - y)**2
gs = tf.Variable(0)
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss, global_step=gs)
s = tf.Session()
s.run(tf.initialize_all_variables())
def cond(i):
return i < 10
def body(i):
return tf.tuple([tf.add(i, 1)], control_inputs=[train_op])
loop = tf.while_loop(cond, body, [i])
for _ in range(1):
s.run(q_x.enqueue_many((data, )))
s.run(q_y.enqueue_many((data, )))
s.run(loop)
s.close()
What I'm doing wrong? Or there is another solution of this problem with too expensive overhead?
Thanks!

The reason the model does not appear to train is because the input reading, gradient calculation, and the minimize() call are all defined outside (and hence, in dataflow terms, before) the body of the tf.while_loop(). This means that all of these parts of the model run only once, before the loop executes, and the loop itself has no effect.
A slight refactoring—to move the dequeue() operations, gradient calculation, and minimize() call inside the loop—fixes the problem and allows your program to train:
optimizer = tf.train.GradientDescentOptimizer(0.05)
def cond(i):
return i < 10
def body(i):
# Dequeue a new example each iteration.
x = q_x.dequeue()
y = q_y.dequeue()
# Compute the loss and gradient update based on the current example.
loss = (tf.add(tf.mul(x, w), b) - y)**2
train_op = optimizer.minimize(loss, global_step=gs)
# Ensure that the update is applied before continuing.
return tf.tuple([tf.add(i, 1)], control_inputs=[train_op])
loop = tf.while_loop(cond, body, [i])
UPDATE: Here's a complete program the executes the while loop, based on the code in your question:
import tensorflow as tf
# Define a single queue with two components to store the input data.
q_data = tf.FIFOQueue(100000, [tf.float32, tf.float32])
# We will use these placeholders to enqueue input data.
placeholder_x = tf.placeholder(tf.float32, shape=[None])
placeholder_y = tf.placeholder(tf.float32, shape=[None])
enqueue_data_op = q_data.enqueue_many([placeholder_x, placeholder_y])
gs = tf.Variable(0)
w = tf.Variable(0.)
b = tf.Variable(0.)
optimizer = tf.train.GradientDescentOptimizer(0.05)
# Construct the while loop.
def cond(i):
return i < 10
def body(i):
# Dequeue a single new example each iteration.
x, y = q_data.dequeue()
# Compute the loss and gradient update based on the current example.
loss = (tf.add(tf.multiply(x, w), b) - y) ** 2
train_op = optimizer.minimize(loss, global_step=gs)
# Ensure that the update is applied before continuing.
with tf.control_dependencies([train_op]):
return i + 1
loop = tf.while_loop(cond, body, [tf.constant(0)])
data = [k * 1. for k in range(10)]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for _ in range(1):
# NOTE: Constructing the enqueue op ahead of time avoids adding
# (potentially many) copies of `data` to the graph.
sess.run(enqueue_data_op,
feed_dict={placeholder_x: data, placeholder_y: data})
print (sess.run([gs, w, b])) # Prints before-loop values.
sess.run(loop)
print (sess.run([gs, w, b])) # Prints after-loop values.

Related

Problem with tensorflow initialization when it gets encapsulated

I am encapsulating an autoencoder cost calculation, in order to allow to be used with an swarm algorithms. The goal is to get a cost summary of the autoencoder sending a few parameters, so the method creates a model, train it and returns its cost tensor
def getAECost(dfnormalized, adamParam, iterations):
N_VISIBLE = 31
N_HIDDEN = 20
DEVICE = '/gpu:0' #Or '/cpu:0'
ITERATIONS = 1 + iterations
with tf.device(DEVICE):
# create node for input data(entiendo none columns and N_VISIBLE rows)
X = tf.placeholder("float", [None, N_VISIBLE], name='X')
# create nodes for hidden variables
W_init_max = 4 * np.sqrt(6. / (N_VISIBLE + N_HIDDEN))
W_init = tf.random_uniform(shape=[N_VISIBLE, N_HIDDEN])#,
# minval=-W_init_max,
# maxval=W_init_max)
#Inicialite our weight and bias
#W [784,500]
W = tf.Variable(W_init, name='W')
#Inicializate only bias of hidden layer
b = tf.Variable(tf.zeros([N_HIDDEN]), name='b')
#W_prime[500,784]
W_prime = tf.transpose(W) # tied weights between encoder and decoder
b_prime = tf.Variable(tf.zeros([N_VISIBLE]), name='b_prime')
#model that take our variables parameters
#Comportamiento de la red neuronal
def model(X, W, b, W_prime, b_prime):
tilde_X = X
#To decode ?
Y = tf.nn.sigmoid(tf.matmul(tilde_X, W) + b) # hidden state
#to reconstructed the input
Z = tf.nn.sigmoid(tf.matmul(Y, W_prime) + b_prime) # reconstructed input
return Z
# build model graph
pred = model(X, W, b, W_prime, b_prime)
# create cost function
#Sum of squared error
cost = tf.reduce_sum(tf.pow(X - pred, 2)) # minimize squared error
#Tensor to parameter learning rate
learning = tf.placeholder("float", name='learning')
train_op = tf.train.AdamOptimizer(learning).minimize(cost) # construct an optimizer
with tf.Session() as sess:
# you need to initialize all variables
tf.global_variables_initializer()
RATIO = adamParam
for i in range(ITERATIONS):
#Prepare input(minibach) from feed autoencoder
input_ = dfnormalized
# train autoencoder
sess.run(train_op, feed_dict={X: input_, learning: RATIO})
#Save last epoch and test
if(i == ITERATIONS-1):
#Get output as dataframe after training(Z is a array, we cast to list to append with a dataframe)
costAE = sess.run(cost, feed_dict={X: input_})
return costAE
It worked a few days ago (maybe I had another session on background), returning the method a float number, but nowadays is not working, getting the inizialization error
FailedPreconditionError: Attempting to use uninitialized value W
[[{{node W/read}}]]
in the training step
sess.run(train_op, feed_dict={X: input_, learning: RATIO})
Any advice about how this initialization problem can be solved, or how can I encapsulate a tensorflow model and session?
Thanks
You have to actually run the variables initializer, tf.global_variables_initializer() returns an op to be executed, it does not run the initialization for you. So the solution to your problem should be replacing the line
tf.global_variables_initializer()
with
sess.run(tf.global_variables_initializer())
I have tried what #Addy said, and reestructured the code to see more legible, and now works perfectly
class Model:
N_VISIBLE = 31
N_HIDDEN = 20
DEVICE = '/gpu:0' #Or '/cpu:0'
with tf.device(DEVICE):
# create node for input data(entiendo none columns and N_VISIBLE rows)
X = tf.placeholder("float", [None, N_VISIBLE], name='X')
# create nodes for hidden variables
W_init_max = 4 * np.sqrt(6. / (N_VISIBLE + N_HIDDEN))
W_init = tf.random_uniform(shape=[N_VISIBLE, N_HIDDEN])#,
# minval=-W_init_max,
# maxval=W_init_max)
#Inicialite our weight and bias
#W [784,500]
W = tf.Variable(W_init, name='W')
#Inicializate only bias of hidden layer
b = tf.Variable(tf.zeros([N_HIDDEN]), name='b')
#W_prime[500,784]
W_prime = tf.transpose(W) # tied weights between encoder and decoder
b_prime = tf.Variable(tf.zeros([N_VISIBLE]), name='b_prime')
#model that take our variables parameters
#Comportamiento de la red neuronal
def model(X, W, b, W_prime, b_prime):
tilde_X = X
#To decode ?
Y = tf.nn.sigmoid(tf.matmul(tilde_X, W) + b) # hidden state
#to reconstructed the input
Z = tf.nn.sigmoid(tf.matmul(Y, W_prime) + b_prime) # reconstructed input
return Z
# build model graph
pred = model(X, W, b, W_prime, b_prime)
# create cost function
#Sum of squared error
cost = tf.reduce_sum(tf.pow(X - pred, 2)) # minimize squared error
#Tensor to parameter learning rate
learning = tf.placeholder("float", name='learning')
train_op = tf.train.AdamOptimizer(learning).minimize(cost) # construct an optimizer
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
def train (self, data, adamParam, iterations):
input_ = data
RATIO = adamParam
for i in range(iterations):
# train autoencoder
_= self.sess.run(self.train_op, feed_dict={self.X: input_, self.learning: RATIO})
#print ("Model trained")
def getAECost(self, data):
input_ = data
return self.sess.run(self.cost, {self.X: data})
def trainAndGetCost (self, dataTrain, dataCost, adamParam, iterations):
self.train(dataTrain, adamParam, iterations)
return self.getAECost(dataCost)

Tensorflow Coding Optimization: How Do I implement this kind code more efficiently?

I have an outer loop training my model and update my x; however, each iteration in outer loop; it requires to train another model and to train the inner model; it requires the current outer iteration's value x
The general frame is as follows
with tf.Session() as sess:
# do some initial computation
x = ......
for i in (range(iters)):
loss = func(x) # compute the loss function
train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)
sess.run(tf.global_variables_initializer())
for j in (range(train_steps)):
per_loss = session.run([loss])
sess.run([train_op])
# update x
x = .....
This implementation is very slow; so I decide to use placeholder
x_placeholder = tf.placeholder(tf.float64,....)
loss = func(x_placholder)
train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)
with tf.Session() as sess:
# do some initial computation
x = ......
for i in (range(iters)):
sess.run(tf.global_variables_initializer())
for j in (range(train_steps)):
per_loss = session.run([train_op, loss],feed_dict={x_placeholder:x})
# update x
x = .....
However, this gives me error as follows
raise ValueError("No variables to optimize.")
ValueError: No variables to optimize.
when run the line train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)
So I'm not sure how to correctly implement this in very efficient way.Any idea
Thanks
Something like this?
x_placeholder = tf.placeholder(tf.float64,....)
loss = func(x_placholder)
train_op = tf.train.AdamOptimizer(1e-4).minimize(loss)
with tf.Session() as sess:
# do some initial computation
x = ......
for i in (range(iters)):
sess.run(tf.global_variables_initializer())
for j in (range(train_steps)):
per_loss = session.run([train_op, loss],feed_dict={x_placeholder:x})
# update x
x = .....

Tensorflow value error: Variable already exists, disallowed

I am predicting financial time series with different time periods using tensorflow. In order to divide input data, I made sub-samples and used for loop.
However, I got an ValueError like this;
ValueError: Variable rnn/basic_lstm_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
Without subsample this code works well.
Below is my code.
import tensorflow as tf
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
class lstm:
def __init__(self, x, y):
# train Parameters
self.seq_length = 50
self.data_dim = x.shape[1]
self.hidden_dim = self.data_dim*2
self.output_dim = 1
self.learning_rate = 0.0001
self.iterations = 5 # originally 500
def model(self,x,y):
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - self.seq_length):
_x = x[i:i + self.seq_length]
_y = y[i + self.seq_length]
dataX.append(_x)
dataY.append(_y)
train_size = int(len(dataY) * 0.7977)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
print(train_size,test_size)
# input place holders
X = tf.placeholder(tf.float32, [None, self.seq_length, self.data_dim])
Y = tf.placeholder(tf.float32, [None, 1])
# build a LSTM network
cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.hidden_dim,state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
self.Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], self.output_dim, activation_fn=None)
# We use the last cell's output
# cost/loss
loss = tf.reduce_sum(tf.square(self.Y_pred - Y)) # sum of the squares
# optimizer
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train = optimizer.minimize(loss)
# RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
# training
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for i in range(self.iterations):
_, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY})
# prediction
train_predict = sess.run(self.Y_pred, feed_dict={X: trainX})
test_predict = sess.run(self.Y_pred, feed_dict={X: testX})
return train_predict, test_predict
# variables definition
tsx = []
tsy = []
tsr = []
trp = []
tep = []
x = np.loadtxt('data.csv', delimiter=',') # data for analysis
y = x[:,[-1]]
z = np.loadtxt('rb.csv', delimiter=',') # data for time series
z1 = z[:,0] # start cell
z2 = z[:,1] # end cell
for i in range(1): # need to change to len(z)
globals()['x_%s' % i] = x[int(z1[i]):int(z2[i]),:] # definition of x
tsx.append(globals()["x_%s" % i])
globals()['y_%s' % i] = y[int(z1[i])+1:int(z2[i])+1,:] # definition of y
tsy.append(globals()["y_%s" % i])
globals()['a_%s' % i] = lstm(tsx[i],tsy[i]) # definition of class
globals()['trp_%s' % i],globals()['tep_%s' % i] = globals()["a_%s" % i].model(tsx[i],tsy[i])
trp.append(globals()["trp_%s" % i])
tep.append(globals()["tep_%s" % i])
Everytime the model method is called, you are building the computational graph of your LSTM. The second time the model method is called, tensorflow discovers that you already created variables with the same name. If the reuse flag of the scope in which the variables are created, is set to False, a ValueError is raised.
To solve this problem you have to set the reuse flag to True by calling tf.get_variable_scope().reuse_variables() at the end of your loop.
Note that you can't add this in the beginning of your loop, because then you are trying to reuse variables that have not yet been created.
You find more info in the tensorflow docs here
You define some variables in the "model" function.
Try this when you want to call "model" function multiple times:
with tf.variable_scope("model_fn") as scope:
train_predict, test_predict = model(input1)
with tf.variable_scope(scope, reuse=True):
train_predict, test_predict = model(input2)

Set value of loss function when calculating/applying gradients

I am using TensorFlow as a part of a larger system where I want to apply the gradient updates in batches. Ideally I'd like to do something along the lines of (in pseudo-code):
grads_and_vars = tf.gradients(loss, [vars])
list_of_losses = [2, 1, 3, ...]
for loss_vals in list_of_losses:
tf.apply_gradients(grads_and_vars, feed_dict = {loss : loss_vals}
My loss function depends on earlier predictions from my neural network and it takes a long time to compute thus my need for this.
When you call tf.gradients, the argument grad_ys let you specify custom values from upstream backprop graph. If you don't specify them, you end up with node that assumes that upstream backprop is tensor of 1's (Fill node). So you could either call tf.gradients with a placeholder that lets you specify custom upstream values, or just feed the Fill node.
IE
tf.reset_default_graph()
a = tf.constant(2.)
b = tf.square(a)
grads = tf.gradients(b, [a])
sess.run(grads, feed_dict={"gradients/Fill:0": 0})
(Posted on behalf of the OP.)
Thanks for your suggestions Yaroslav! Below is the code I put together based on your suggestions. I think this solves my problem:
tf.reset_default_graph()
with tf.Session() as sess:
X = tf.placeholder("float", name="X")
W = tf.Variable(1.0, name="weight")
b = tf.Variable(0.5, name="bias")
pred = tf.sigmoid(tf.add(tf.multiply(X, W), b))
opt = tf.train.AdagradOptimizer(1.0)
gvs = tf.gradients(pred, [W, b], grad_ys=0.5)
train_step = opt.apply_gradients(zip(gvs, [W, b]))
tf.global_variables_initializer().run()
for i in range(50):
val, _ = sess.run([pred, train_step], feed_dict= {X : 2})
print(val)

how to restore the learning rate in TF from previously saved checkpoint ?

I have stopped training at some point and saved checkpoint, meta files etc.
Now when I want to resume training, I want to start with last running learning rate of the optimizer. Can you provide a example of doing so ?
For those coming here (like me) wondering whether the last learning rate is automatically restored: tf.train.exponential_decay doesn't add any Variables to the graph, it only adds the operations necessary to derive the correct current learning rate value given a certain global_step value. This way, you only need to checkpoint the global_step value (which is done by default normally) and, assuming you keep the same initial learning rate, decay steps and decay factor, you'll automatically pick up training where you left it, with the correct learning rate value.
Inspecting the checkpoint won't show any learning_rate variable (or similar), simply because there is no need for any.
This example code learns to add two numbers:
import tensorflow as tf
import numpy as np
import os
save_ckpt_dir = './add_ckpt'
ckpt_filename = 'add.ckpt'
save_ckpt_path = os.path.join(save_ckpt_dir, ckpt_filename)
if not os.path.isdir(save_ckpt_dir):
os.mkdir(save_ckpt_dir)
if [fname.startswith("add.ckpt") for fname in os.listdir(save_ckpt_dir)]: # prefer to load pre-trained net
load_ckpt_path = save_ckpt_path
else:
load_ckpt_path = None # train from scratch
def add_layer(inputs, in_size, out_size, activation_fn=None):
Weights = tf.Variable(tf.ones([in_size, out_size]), name='Weights')
biases = tf.Variable(tf.zeros([1, out_size]), name='biases')
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
if activation_fn is None:
layer_output = Wx_plus_b
else:
layer_output = activation_fn(Wx_plus_b)
return layer_output
def produce_batch(batch_size=256):
"""Loads a single batch of data.
Args:
batch_size: The number of excersises in the batch.
Returns:
x : column vector of numbers
y : another column of numbers
xy_sum : the sum of the columns
"""
x = np.random.random(size=[batch_size, 1]) * 10
y = np.random.random(size=[batch_size, 1]) * 10
xy_sum = x + y
return x, y, xy_sum
with tf.name_scope("inputs"):
xs = tf.placeholder(tf.float32, [None, 1])
ys = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("correct_labels"):
xysums = tf.placeholder(tf.float32, [None, 1])
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
with tf.name_scope("graph_body"):
prediction = add_layer(tf.concat([xs, ys], 1), 2, 1, activation_fn=None)
with tf.name_scope("loss_and_train"):
# the error between prediction and real data
loss = tf.reduce_mean(tf.reduce_sum(tf.square(xysums-prediction), reduction_indices=[1]))
# Passing global_step to minimize() will increment it at each step.
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
with tf.name_scope("init_load_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
if load_ckpt_path:
saver.restore(sess, load_ckpt_path)
for i in range(1000):
x, y, xy_sum = produce_batch(256)
_, global_step_np, loss_np, lr_np = sess.run([train_step, global_step, loss, lr], feed_dict={xs: x, ys: y, xysums: xy_sum})
if global_step_np % 100 == 0:
print("global step: {}, loss: {}, learning rate: {}".format(global_step_np, loss_np, lr_np))
saver.save(sess, save_ckpt_path)
if you run it a few times, you will see the learning rate decrease. It also saves the global step. The trick is here:
with tf.name_scope("step_and_learning_rate"):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.15, global_step, 10, 0.96) # start lr=0.15, decay every 10 steps with a base of 0.96
...
train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
By default, saver.save will save all savable objects (including learning rate and global step). However, if tf.train.Saver is provided with var_list, saver.save will only save the vars included in var_list:
saver = tf.train.Saver(var_list = ..list of vars to save..)
sources:
https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay
https://stats.stackexchange.com/questions/200063/tensorflow-adam-optimizer-with-exponential-decay
https://www.tensorflow.org/api_docs/python/tf/train/Saver (see "saveable objects")