tensorflow autoencoder can not learn in training - optimization

In my code, I am trying to practice to use tr.train.batch function. In sess.run([optimizer]) line, it doesnt return anything and it is just frozen. Can you please find my mistake?
tensors = tf.convert_to_tensor(x_train, dtype=tf.float32)
tensors = tf.reshape(tensors, shape=x_train.shape)
batch = tf.train.batch([tensors], batch_size=BATCH_SIZE, enqueue_many=True)
# Weights and biases to hidden layer
Wh = tf.Variable(tf.random_normal([COLUMN-2, UNITS_OF_HIDDEN_LAYER], mean=0.0, stddev=0.05))
bh = tf.Variable(tf.zeros([UNITS_OF_HIDDEN_LAYER]))
h = tf.nn.tanh(tf.matmul(batch, Wh) + bh)
# Weights and biases to output layer
Wo = tf.transpose(Wh) # tied weights
bo = tf.Variable(tf.zeros([COLUMN-2]))
y = tf.nn.tanh(tf.matmul(h, Wo) + bo)
# Objective functions
mean_sqr = tf.reduce_mean(tf.pow(batch - y, 2))
optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(mean_sqr)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for j in range(TRAINING_EPOCHS):
sess.run([optimizer])
print("optimizer: ")

The tf.train.batch is a queue, so you need to start the queues in the session by using tf.train.start_queue_runners. You can learn about this in tensorflow's threading and Queues guide.
Make the following changes:
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
# Training loop
for j in range(TRAINING_EPOCHS):
if coord.should_stop():
break
sess.run([optimizer])
print("optimizer: ")
except Exception, e:
# When done, ask the threads to stop.
coord.request_stop(e)
finally:
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)

Related

Problem with tensorflow initialization when it gets encapsulated

I am encapsulating an autoencoder cost calculation, in order to allow to be used with an swarm algorithms. The goal is to get a cost summary of the autoencoder sending a few parameters, so the method creates a model, train it and returns its cost tensor
def getAECost(dfnormalized, adamParam, iterations):
N_VISIBLE = 31
N_HIDDEN = 20
DEVICE = '/gpu:0' #Or '/cpu:0'
ITERATIONS = 1 + iterations
with tf.device(DEVICE):
# create node for input data(entiendo none columns and N_VISIBLE rows)
X = tf.placeholder("float", [None, N_VISIBLE], name='X')
# create nodes for hidden variables
W_init_max = 4 * np.sqrt(6. / (N_VISIBLE + N_HIDDEN))
W_init = tf.random_uniform(shape=[N_VISIBLE, N_HIDDEN])#,
# minval=-W_init_max,
# maxval=W_init_max)
#Inicialite our weight and bias
#W [784,500]
W = tf.Variable(W_init, name='W')
#Inicializate only bias of hidden layer
b = tf.Variable(tf.zeros([N_HIDDEN]), name='b')
#W_prime[500,784]
W_prime = tf.transpose(W) # tied weights between encoder and decoder
b_prime = tf.Variable(tf.zeros([N_VISIBLE]), name='b_prime')
#model that take our variables parameters
#Comportamiento de la red neuronal
def model(X, W, b, W_prime, b_prime):
tilde_X = X
#To decode ?
Y = tf.nn.sigmoid(tf.matmul(tilde_X, W) + b) # hidden state
#to reconstructed the input
Z = tf.nn.sigmoid(tf.matmul(Y, W_prime) + b_prime) # reconstructed input
return Z
# build model graph
pred = model(X, W, b, W_prime, b_prime)
# create cost function
#Sum of squared error
cost = tf.reduce_sum(tf.pow(X - pred, 2)) # minimize squared error
#Tensor to parameter learning rate
learning = tf.placeholder("float", name='learning')
train_op = tf.train.AdamOptimizer(learning).minimize(cost) # construct an optimizer
with tf.Session() as sess:
# you need to initialize all variables
tf.global_variables_initializer()
RATIO = adamParam
for i in range(ITERATIONS):
#Prepare input(minibach) from feed autoencoder
input_ = dfnormalized
# train autoencoder
sess.run(train_op, feed_dict={X: input_, learning: RATIO})
#Save last epoch and test
if(i == ITERATIONS-1):
#Get output as dataframe after training(Z is a array, we cast to list to append with a dataframe)
costAE = sess.run(cost, feed_dict={X: input_})
return costAE
It worked a few days ago (maybe I had another session on background), returning the method a float number, but nowadays is not working, getting the inizialization error
FailedPreconditionError: Attempting to use uninitialized value W
[[{{node W/read}}]]
in the training step
sess.run(train_op, feed_dict={X: input_, learning: RATIO})
Any advice about how this initialization problem can be solved, or how can I encapsulate a tensorflow model and session?
Thanks
You have to actually run the variables initializer, tf.global_variables_initializer() returns an op to be executed, it does not run the initialization for you. So the solution to your problem should be replacing the line
tf.global_variables_initializer()
with
sess.run(tf.global_variables_initializer())
I have tried what #Addy said, and reestructured the code to see more legible, and now works perfectly
class Model:
N_VISIBLE = 31
N_HIDDEN = 20
DEVICE = '/gpu:0' #Or '/cpu:0'
with tf.device(DEVICE):
# create node for input data(entiendo none columns and N_VISIBLE rows)
X = tf.placeholder("float", [None, N_VISIBLE], name='X')
# create nodes for hidden variables
W_init_max = 4 * np.sqrt(6. / (N_VISIBLE + N_HIDDEN))
W_init = tf.random_uniform(shape=[N_VISIBLE, N_HIDDEN])#,
# minval=-W_init_max,
# maxval=W_init_max)
#Inicialite our weight and bias
#W [784,500]
W = tf.Variable(W_init, name='W')
#Inicializate only bias of hidden layer
b = tf.Variable(tf.zeros([N_HIDDEN]), name='b')
#W_prime[500,784]
W_prime = tf.transpose(W) # tied weights between encoder and decoder
b_prime = tf.Variable(tf.zeros([N_VISIBLE]), name='b_prime')
#model that take our variables parameters
#Comportamiento de la red neuronal
def model(X, W, b, W_prime, b_prime):
tilde_X = X
#To decode ?
Y = tf.nn.sigmoid(tf.matmul(tilde_X, W) + b) # hidden state
#to reconstructed the input
Z = tf.nn.sigmoid(tf.matmul(Y, W_prime) + b_prime) # reconstructed input
return Z
# build model graph
pred = model(X, W, b, W_prime, b_prime)
# create cost function
#Sum of squared error
cost = tf.reduce_sum(tf.pow(X - pred, 2)) # minimize squared error
#Tensor to parameter learning rate
learning = tf.placeholder("float", name='learning')
train_op = tf.train.AdamOptimizer(learning).minimize(cost) # construct an optimizer
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
def train (self, data, adamParam, iterations):
input_ = data
RATIO = adamParam
for i in range(iterations):
# train autoencoder
_= self.sess.run(self.train_op, feed_dict={self.X: input_, self.learning: RATIO})
#print ("Model trained")
def getAECost(self, data):
input_ = data
return self.sess.run(self.cost, {self.X: data})
def trainAndGetCost (self, dataTrain, dataCost, adamParam, iterations):
self.train(dataTrain, adamParam, iterations)
return self.getAECost(dataCost)

How to restore saved BiRNN model in tensorflow so that all output neurons correctly bundled to the corresponding output classes

I faced a problem with properly restoring the saved model in tensorflow. I created the Bidirectional RNN model in tensorflow with following code:
batchX_placeholder = tf.placeholder(tf.float32, [None, timesteps, 1],
name="batchX_placeholder")])
batchY_placeholder = tf.placeholder(tf.float32, [None, num_classes],
name="batchY_placeholder")
weights = tf.Variable(np.random.rand(2*STATE_SIZE, num_classes),
dtype=tf.float32, name="weights")
biases = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32,
name="biases")
logits = BiRNN(batchX_placeholder, weights, biases)
with tf.name_scope("prediction"):
prediction = tf.nn.softmax(logits)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=batchY_placeholder))
lr = tf.Variable(learning_rate, trainable=False, dtype=tf.float32,
name='lr')
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(loss_op)
init_op = tf.initialize_all_variables()
saver = tf.train.Saver()
The architecture of BiRNN created with the following function:
def BiRNN(x, weights, biases):
# Unstack to get a list of 'time_steps' tensors of shape (batch_size,
# num_input)
x = tf.unstack(x, time_steps, 1)
# Forward and Backward direction cells
lstm_fw_cell = rnn.BasicLSTMCell(STATE_SIZE, forget_bias=1.0)
lstm_bw_cell = rnn.BasicLSTMCell(STATE_SIZE, forget_bias=1.0)
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell,
lstm_bw_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights) + biases
Then I train a model and save it after each 200 steps:
with tf.Session() as sess:
sess.run(init_op)
current_step = 0
for batch_x, batch_y in get_minibatch():
sess.run(train_op, feed_dict={batchX_placeholder: batch_x,
batchY_placeholder: batch_y})
current_step += 1
if current_step % 200 == 0:
saver.save(sess, os.path.join(model_dir, "model")
To run the saved model in inference mode I use saved tensorflow graph in "model.meta" file:
graph = tf.get_default_graph()
saver = tf.train.import_meta_graph(os.path.join(model_dir, "model.meta"))
sess = tf.Session()
saver.restore(sess, tf.train.latest_checkpoint(model_dir)
weights = graph.get_tensor_by_name("weights:0")
biases = graph.get_tensor_by_name("biases:0")
batchX_placeholder = graph.get_tensor_by_name("batchX_placeholder:0")
batchY_placeholder = graph.get_tensor_by_name("batchY_placeholder:0")
logits = BiRNN(batchX_placeholder, weights, biases)
prediction = graph.get_operation_by_name("prediction/Softmax")
argmax_pred = tf.argmax(prediction, 1)
init = tf.global_variables_initializer()
sess.run(init)
for x_seq, y_gt in get_sequence():
_, y_pred = sess.run([prediction, argmax_pred],
feed_dict={batchX_placeholder: [x_seq]],
batchY_placeholder: [[0.0, 0.0]]})
print("Y ground true: " + str(y_gt) + ", Y pred: " + str(y_pred[0]))
And when I run the code in inference mode, I get different results each time I launch it. It seems that output neurons from the softmax layer randomly bundled with different output classes.
So, my question is: How can I save and then correctly restore the model in tensorflow, so that all neurons properly bundled with corresponding output classes?
There is no need to call tf.global_variables_initializer(), I think that is your problem.
I removed some operations: logits, weights and biases since you don't need them, all those are already loaded, use graph.get_tensor_by_name to get them.
For the prediction, get the tensor instead of the operation. (see this answer):
This is the code:
graph = tf.get_default_graph()
saver = tf.train.import_meta_graph(os.path.join(model_dir, "model.meta"))
sess = tf.Session()
saver.restore(sess, tf.train.latest_checkpoint(model_dir))
batchX_placeholder = graph.get_tensor_by_name("batchX_placeholder:0")
batchY_placeholder = graph.get_tensor_by_name("batchY_placeholder:0")
prediction = graph.get_tensor_by_name("prediction/Softmax:0")
argmax_pred = tf.argmax(prediction, 1)
Edit 1: I notice that I wasn't clear on why you got different results.
And when I run the code in inference mode, I get different results
each time I launch it.
Notice that although you used the weights from the loaded model, you are creating the BiRNN again, and the BasicLSTMCell also have weights and other variables that you don't set from your loaded model, hence they need to be initialized (with new random values) resulting in an untrained model again.

loading a trained model back into tensorflow

I have constructed a model in Tensorflow, which I've trained. Now I want to work with the outputs, so I want to load the Checkpoint, Meta and all the other files back into tensorlow.
I have used the following code to train the model:
# Logging
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train')
test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')
validate_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/validate')
writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph)
saver = tf.train.Saver() # for storing the best network
# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)
# Best validation accuracy seen so far
bestValidation = -0.1
# Training loop
coord = tf.train.Coordinator() # coordinator for threads
threads = tf.train.start_queue_runners(coord = coord, sess=sess) # start queue thread
# Training loop
for i in range(FLAGS.maxIter):
xTrain, yTrain = sess.run(data_batch)
sess.run(train_step, feed_dict={x_data: xTrain, y_target: np.transpose([yTrain])})
summary = sess.run(merged, feed_dict={x_data: xTrain, y_target: np.transpose([yTrain])})
train_writer.add_summary(summary, i)
if ((i + 1) % 10 == 0):
print("Iteration:", i + 1, "/", FLAGS.maxIter)
summary = sess.run(merged, feed_dict={x_data: dataTest.data, y_target: np.transpose([dataTest.target])})
test_writer.add_summary(summary, i)
currentValidation, summary = sess.run([accuracy, merged], feed_dict={x_data: dataTest.data,
y_target: np.transpose(
[dataTest.target])})
validate_writer.add_summary(summary, i)
if (currentValidation > bestValidation and currentValidation <= 0.9):
bestValidation = currentValidation
saver.save(sess=sess, save_path=FLAGS.summary_dir + '/bestNetwork')
print("\tbetter network stored,", currentValidation, ">", bestValidation)
coord.request_stop() # ask threads to stop
coord.join(threads) # wait for threads to stop
Now I want to load the model back into Tensorflow. I want to be able to do a few things:
Work with the output I've already created for the training and test datasets.
Load new data to the model, and then be able to use the same weights to make new outputs.
I've tried using the following code to load the model back into tensorflow, but it doesn't work:
with tf.Session() as sess:
saver = tf.train.import_meta_graph(FLAGS.summary_dir + '/bestNetwork.meta')
saver.restore(sess,tf.train.latest_checkpoint(FLAGS.summary_dir + '/checkpoint'))
I get the following error when running the code:
TypeError: expected bytes, NoneType found
As I've understod, I'm loading the meta graph from the previous section with the tf.train.import_meta_graph() function, and then I load the weights with the checkpoint part. So why is this not working?
You saved the model as bestNetwork. Try this instead:
saver.restore(sess,tf.train.latest_checkpoint(FLAGS.summary_dir + '/**bestNetwork**'))

Add a summary of accuracy of the whole train/test dataset in Tensorflow

I am trying to use Tensorboard to visualize my training procedure. My purpose is, when every epoch completed, I would like to test the network's accuracy using the whole validation dataset, and store this accuracy result into a summary file, so that I can visualize it in Tensorboard.
I know Tensorflow has summary_op to do it, however it seems only work for one batch when running the code sess.run(summary_op). I need to calculate the accuracy for the whole dataset. How?
Is there any example to do it?
Define a tf.scalar_summary that accepts a placeholder:
accuracy_value_ = tf.placeholder(tf.float32, shape=())
accuracy_summary = tf.scalar_summary('accuracy', accuracy_value_)
Then calculate the accuracy for the whole dataset (define a routine that calculates the accuracy for every batch in the dataset and extract the mean value) and save it into a python variable, let's call it va.
Once you have the value of va, just run the accuracy_summary op, feeding the accuracy_value_ placeholder:
sess.run(accuracy_summary, feed_dict={accuracy_value_: va})
I implement a naive one-layer model as an example to classify MNIST dataset and visualize validation accuracy in Tensorboard, it works for me.
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import os
# number of epoch
num_epoch = 1000
model_dir = '/tmp/tf/onelayer_model/accu_info'
# mnist dataset location, change if you need
data_dir = '../data/mnist'
# load MNIST dataset without one hot
dataset = read_data_sets(data_dir, one_hot=False)
# Create placeholder for input images X and labels y
X = tf.placeholder(tf.float32, [None, 784])
# one_hot = False
y = tf.placeholder(tf.int32)
# One layer model graph
W = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[10]))
logits = tf.nn.relu(tf.matmul(X, W) + b)
init = tf.initialize_all_variables()
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
# loss function
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
_, top_1_op = tf.nn.top_k(logits)
top_1 = tf.reshape(top_1_op, shape=[-1])
correct_classification = tf.cast(tf.equal(top_1, y), tf.float32)
# accuracy function
acc = tf.reduce_mean(correct_classification)
# define info that is used in SummaryWritter
acc_summary = tf.scalar_summary('valid_accuracy', acc)
valid_summary_op = tf.merge_summary([acc_summary])
with tf.Session() as sess:
# initialize all the variable
sess.run(init)
print("Writing Summaries to %s" % model_dir)
train_summary_writer = tf.train.SummaryWriter(model_dir, sess.graph)
# load validation dataset
valid_x = dataset.validation.images
valid_y = dataset.validation.labels
for epoch in xrange(num_epoch):
batch_x, batch_y = dataset.train.next_batch(100)
feed_dict = {X: batch_x, y: batch_y}
_, acc_value, loss_value = sess.run(
[train_op, acc, loss], feed_dict=feed_dict)
vsummary = sess.run(valid_summary_op,
feed_dict={X: valid_x,
y: valid_y})
# Write validation accuracy summary
train_summary_writer.add_summary(vsummary, epoch)
Using batching with your validation set is possible in case you are using tf.metrics ops, which use internal counters. Here is a simplified example:
model = create_model()
tf.summary.scalar('cost', model.cost_op)
acc_value_op, acc_update_op = tf.metrics.accuracy(labels,predictions)
summary_common = tf.summary.merge_all()
summary_valid = tf.summary.merge([
tf.summary.scalar('accuracy', acc_value_op),
# other metrics here...
])
with tf.Session() as sess:
train_writer = tf.summary.FileWriter(logs_path + '/train',
sess.graph)
valid_writer = tf.summary.FileWriter(logs_path + '/valid')
While training, only write the common summary using your train-writer:
summary = sess.run(summary_common)
train_writer.add_summary(summary, tf.train.global_step(sess, gstep_op))
train_writer.flush()
After every validation, write both summaries using the valid-writer:
gstep, summaryc, summaryv = sess.run([gstep_op, summary_common, summary_valid])
valid_writer.add_summary(summaryc, gstep)
valid_writer.add_summary(summaryv, gstep)
valid_writer.flush()
When using tf.metrics, don't forget to reset the internal counters (local variables) before every validation step.

initial states remains zero in bidirectional lstm

I define initial states but when I print them they are still zero !!!
Below is my code
def BiRNN(x, weights, biases):
#some x shaping
lstm_fw_cell = rnn_cell.GRUCell(n_hidden)
lstm_bw_cell = rnn_cell.GRUCell(n_hidden)
init_state_fw = lstm_fw_cell.zero_state(batch_size, tf.float32)
init_state_bw = lstm_bw_cell.zero_state(batch_size, tf.float32)
outputs, fstate, bstate = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32,initial_state_fw=init_state_fw,
initial_state_bw=init_state_bw)
return [tf.matmul(outputs[-1], weights['out']) + biases['out'],initial_state_fw]
pred = BiRNN(x, weights, biases)
cost = tf.reduce_mean(tf.pow(pred[0] - y, 2))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.initialize_all_variables()
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
# Start populating the filename queue.
batch_x= example3[step%n_samples]
batch_y=label3[step%n_samples]
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
batch_y = batch_y.reshape((batch_size, n_steps, n_classes))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
w=sess.run(pred[1])
print(w);
I want to get the initial states for later
I have a data set that requires two different initial states and I want the initial states to be trained separately.