Weights and costs unchanged when training in tensorflow - tensorflow

I'm a total rookie and tried to use tensorflow for solving multi-input and multi-output problem. However, during the process of training, the weights and the cost of the network are unchanged. Here's some main code, any suggestion would be appreciated!
learning_rate = 0.01
training_epoch = 2000
batch_size = 100
display_step = 1
# place holder for graph input
x = tf.placeholder("float64", [None, 14])
y = tf.placeholder("float64", [None, 8])
# model weights
w_1 = tf.Variable(tf.zeros([14, 11], dtype = tf.float64))
w_2 = tf.Variable(tf.zeros([11, 8], dtype = tf.float64))
# construct a model
h_in = tf.matmul(x, w_1)
h_out = tf.nn.relu(h_in)
o_in = tf.matmul(h_out, w_2)
o_out = tf.nn.relu(o_in)
# cost: mean square error
cost = tf.reduce_sum(tf.pow((o_out - y), 2))
# optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# initializer
init = tf.global_variables_initializer()
# launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epoch):
pos = 0;
# loop over all batches
if pos < train_input_array.shape[0]:
# get the next batch
batch_i = []
batch_o = []
for i in range(pos, pos + batch_size):
batch_i.append(train_input_array[i].tolist())
batch_o.append(train_output_array[i].tolist())
np.array(batch_i)
np.array(batch_o)
pos += batch_size;
sess.run(optimizer, feed_dict = {x: batch_i, y: batch_o})
print sess.run(w_2[0])
if (epoch + 1) % display_step == 0:
c = sess.run(cost, feed_dict = {x: batch_i, y: batch_o})
print("Epoch: ", "%04d" % (epoch + 1), "cost: ", "{:.9f}".format(c))

I think you need to change your cost function, to reduce_mean
# reduce sum doesn't work
cost = tf.reduce_sum(tf.pow((o_out - y), 2))
# you need to use mean
cost = tf.reduce_mean(tf.pow((o_out - y), 2))

Related

Tensor flow sigmoid function not working

I am new to tensor flow and Neural Networks.
In the code below,everything works fine.
However as soon as I implement the tf.sigmoid function by uncommenting #X21 = tf.sigmoid(X21); I get weird results where in all my predictions are equal to 1. Any reason why this might be happening?
Note that I am predicting house prices which are in thousands.
# Set model weights
b1_1 = tf.cast(tf.Variable(np.random.randn(), name="bias"),tf.float64)
b1_2 = tf.cast(tf.Variable(np.random.randn(), name="bias"),tf.float64)
b2_1 = tf.cast(tf.Variable(np.random.randn(), name="bias"),tf.float64)
W1_1 = tf.cast(tf.Variable(np.random.randn(train_X.shape[1], 1), name="bias"),tf.float64)
W1_2 = tf.cast(tf.Variable(np.random.randn(train_X.shape[1], 1), name="bias"),tf.float64)
W2_1 = tf.cast(tf.Variable(np.random.randn(2, 1), name="bias"),tf.float64)
X11 = tf.add(tf.matmul(train_X,W1_1),b1_1)
X12 = tf.add(tf.matmul(train_X,W1_2),b1_2)
X21 = tf.add(tf.matmul(tf.squeeze(tf.transpose(tf.stack((X11,X12)))),W2_1),b2_1)
#X21 = tf.sigmoid(X21)
# placeholders for a tensor that will be always fed.
Y = tf.placeholder('float64', shape = [47, 1])
cost = (tf.reduce_sum(tf.pow(X21-Y, 2))/(2*n_samples))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
#for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={Y: train_Y})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(cost, feed_dict={Y: train_Y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={Y: train_Y})
print("Training cost=", training_cost)
line = sess.run(X21,feed_dict={Y: train_Y})

Training some variables in a restored TensorFlow model

I have a TensorFlow (TF) model that I'd like to restore and retrain some of its parameters. I know that tf.get_operation_by_name('name of the optimizer') retrieves the original optimizer that was used to train the model before it was stored. However, I don't know how to pass the new list of TF variables that I want the optimizer to retrain!
This example helps illustrate what I want to do:
learning_rate = 0.0001
training_iters = 60000
batch_size = 64
display_step = 20
ImVecDim = 784# The number of elements in a an image vector (flattening a 28x28 2D image)
NumOfClasses = 10
dropout = 0.8
with tf.Session() as sess:
LoadMod = tf.train.import_meta_graph('simple_mnist.ckpt.meta') # This object loads the model
LoadMod.restore(sess, tf.train.latest_checkpoint('./')) # Loading weights and biases and other stuff to the model
g = tf.get_default_graph()
# Variables to be retrained:
wc2 = g.get_tensor_by_name('wc2:0')
bc2 = g.get_tensor_by_name('bc2:0')
wc3 = g.get_tensor_by_name('wc3:0')
bc3 = g.get_tensor_by_name('bc3:0')
wd1 = g.get_tensor_by_name('wd1:0')
bd1 = g.get_tensor_by_name('bd1:0')
wd2 = g.get_tensor_by_name('wd2:0')
bd2 = g.get_tensor_by_name('bd2:0')
out_w = g.get_tensor_by_name('out_w:0')
out_b = g.get_tensor_by_name('out_b:0')
VarToTrain = [wc2,wc3,wd1,wd2,out_w,bc2,bc3,bd1,bd2,out_b]
# Retrieving the optimizer:
Opt = tf.get_operation_by_name('Adam')
# Retraining:
X = g.get_tensor_by_name('ImageIn:0')
Y = g.get_tensor_by_name('LabelIn:0')
KP = g.get_tensor_by_name('KeepProb:0')
accuracy = g.get_tensor_by_name('NetAccuracy:0')
cost = g.get_tensor_by_name('loss:0')
step = 1
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
#########################################################################
# Here I want to pass (VarToTrain) to the optimizer (Opt)! #
#########################################################################
if step % display_step == 0:
acc = sess.run(accuracy, feed_dict={X: batch_xs, Y: batch_ys, KP: 1.})
loss = sess.run(cost, feed_dict={X: batch_xs, Y: batch_ys, KP: 1.})
print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + "{:.6f}".format(
loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
step += 1
feed_dict = {X: mnist.test.images[:256], Y: mnist.test.labels[:256], KP: 1.0}
ModelAccuracy = sess.run(accuracy, feed_dict)
print('Retraining finished'+', Test Accuracy = %f' %ModelAccuracy)
Well, I have not figured out a way to do what I want exactly, but I've found a way around the problem; instead of passing a new list of variables to the original optimizer, I defined a new optimizer with those variables passed to its minimize() method. The code is given below:
learning_rate = 0.0001
training_iters = 60000
batch_size = 64
display_step = 20
ImVecDim = 784# The number of elements in a an image vector (flattening a 28x28 2D image)
NumOfClasses = 10
dropout = 0.8
with tf.Session() as sess:
LoadMod = tf.train.import_meta_graph('simple_mnist.ckpt.meta') # This object loads the model
LoadMod.restore(sess, tf.train.latest_checkpoint('./')) # Loading weights and biases and other stuff to the model
g = tf.get_default_graph()
# Retraining:
X = g.get_tensor_by_name('ImageIn:0')
Y = g.get_tensor_by_name('LabelIn:0')
KP = g.get_tensor_by_name('KeepProb:0')
accuracy = g.get_tensor_by_name('NetAccuracy:0')
cost = g.get_tensor_by_name('loss:0')
######################## Producing a list and defining a new optimizer ####################################
VarToTrain = g.get_collection_ref('trainable__variables')
del VarToTrain[0] # Removing a variable from the list
del VarToTrain[5] # Removing another variable from the list
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).\
minimize(cost,var_list= VarToTrain)
##########################################################################################
step = 1
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={X: batch_xs, Y: batch_ys, KP: dropout})
if step % display_step == 0:
acc = sess.run(accuracy, feed_dict={X: batch_xs, Y: batch_ys, KP: 1.})
loss = sess.run(cost, feed_dict={X: batch_xs, Y: batch_ys, KP: 1.})
print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + "{:.6f}".format(
loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
step += 1
feed_dict = {X: mnist.test.images[:256], Y: mnist.test.labels[:256], KP: 1.0}
ModelAccuracy = sess.run(accuracy, feed_dict)
print('Retraining finished'+', Test Accuracy = %f' %ModelAccuracy)
The code above did the job, but it has some issues! First, for some reason, I keep getting error messages every time I define a similar optimizer to the original one, tf.train.AdamOtimizer(). The only optimizer that I can define without TF throwing me error messages is the tf.train.GradientDescentOptimizer(). The other issue in this solution is its inconvenience; in order to produce a list of the variables I want to train, I first have to produce a list of all trainable variables using VarToTrain = g.get_collection_ref('trainable_variables'), print them out, memorize the location of the unwanted variables in the list, then, delete them one by one using del method!! There must be a more elegant way to doing that. What I have done works fine only for small networks!!

ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables

I am training the "Show and tell" model using tensorflow in which the model automatically generates the captions of the images. How ever I am getting this error.
This is the traceback:
------------------------------------------------------------------------
---
ValueError Traceback (most recent call
last)
<ipython-input-36-b6da0a27b701> in <module>()
1 try:
2 #train(.001,False,False) #train from scratch
----> 3 train(.001,True,True) #continue training from pretrained weights #epoch500
4 #train(.001) #train from previously saved weights
5 except KeyboardInterrupt:
ipython-input-35-39693d0edd0a> in train(learning_rate, continue_training, transfer)
31 learning_rate = tf.train.exponential_decay(learning_rate, global_step,
32 int(len(index)/batch_size), 0.95)
---> 33 train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
34 tf.global_variables_initializer().run()
35
/home/niraj/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/optimizer.pyc in minimize(self, loss, global_step, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, name, grad_loss)
320 "No gradients provided for any variable, check your graph for ops"
321 " that do not support gradients, between variables %s
and loss %s." %
--> 322 ([str(v) for _, v in grads_and_vars], loss))
323
324 return self.apply_gradients(grads_and_vars,
global_step=global_step,
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["tf.Variable 'word_embedding:0' shape=(2943, 256) dtype=float32_ref>", "tf.Variable 'embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'img_embedding:0' shape=(4096, 256) dtype=float32_ref>", "tf.Variable 'img_embedding_bias:0' shape=(256,) dtype=float32_ref>", "tf.Variable 'word_encoding:0' shape=(256, 2943) dtype=float32_ref>", "tf.Variable 'word_encoding_bias:0' shape=(2943,) dtype=float32_ref>"] and loss Tensor("RNN/div:0", shape=(), dtype=float32).
I know that the error is due to the fact that there is a variable which doesen't holds the gradient during optimisation which in turn is cutting the graph but I am unable to pick it out.
I am using already trained VGG-net 16 model parameters and the FLICKR-30 image dataset having corresponding annotations.
This is the code:
def get_data(annotation_path, feature_path):
annotations = pd.read_table(annotation_path, sep='\t', header=None, names=['image', 'caption'])
return np.load(feature_path,'r'), annotations['caption'].values
def preProBuildWordVocab(sentence_iterator, word_count_threshold=30): # function from Andre Karpathy's NeuralTalk
print('preprocessing %d word vocab' % (word_count_threshold, ))
word_counts = {}
nsents = 0
for sent in sentence_iterator:
nsents += 1
for w in sent.lower().split(' '):
word_counts[w] = word_counts.get(w, 0) + 1
vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))
ixtoword = {}
ixtoword[0] = '.'
wordtoix = {}
wordtoix['#START#'] = 0
ix = 1
for w in vocab:
wordtoix[w] = ix
ixtoword[ix] = w
ix += 1
word_counts['.'] = nsents
bias_init_vector = np.array([1.0*word_counts[ixtoword[i]] for i in ixtoword])
bias_init_vector /= np.sum(bias_init_vector)
bias_init_vector = np.log(bias_init_vector)
bias_init_vector -= np.max(bias_init_vector)
return wordtoix, ixtoword, bias_init_vector.astype(np.float32)
class Caption_Generator():
def __init__(self, dim_in, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, init_b):
self.dim_in = dim_in
self.dim_embed = dim_embed
self.dim_hidden = dim_hidden
self.batch_size = batch_size
self.n_lstm_steps = n_lstm_steps
self.n_words = n_words
# declare the variables to be used for our word embeddings
with tf.device("/cpu:0"):
self.word_embedding = tf.Variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='word_embedding')
self.embedding_bias = tf.Variable(tf.zeros([dim_embed]), name='embedding_bias')
# declare the LSTM itself
self.lstm = tf.contrib.rnn.BasicLSTMCell(dim_hidden)
# declare the variables to be used to embed the image feature embedding to the word embedding space
self.img_embedding = tf.Variable(tf.random_uniform([dim_in, dim_hidden], -0.1, 0.1), name='img_embedding')
self.img_embedding_bias = tf.Variable(tf.zeros([dim_hidden]), name='img_embedding_bias')
# declare the variables to go from an LSTM output to a word encoding output
self.word_encoding = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='word_encoding')
# initialize this bias variable from the preProBuildWordVocab output
self.word_encoding_bias = tf.Variable(init_b, name='word_encoding_bias')
def build_model(self):
# declaring the placeholders for our extracted image feature vectors, our caption, and our mask
# (describes how long our caption is with an array of 0/1 values of length `maxlen`
img = tf.placeholder(tf.float32, [self.batch_size, self.dim_in])
caption_placeholder = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
# getting an initial LSTM embedding from our image_imbedding
image_embedding = tf.matmul(img, self.img_embedding) + self.img_embedding_bias
# setting initial state of our LSTM
state = self.lstm.zero_state(self.batch_size, dtype=tf.float32)
total_loss = 0.0
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
#if this isn’t the first iteration of our LSTM we need to get the word_embedding corresponding
# to the (i-1)th word in our caption
with tf.device("/cpu:0"):
current_embedding = tf.nn.embedding_lookup(self.word_embedding, caption_placeholder[:,i-1]) + self.embedding_bias
else:
#if this is the first iteration of our LSTM we utilize the embedded image as our input
current_embedding = image_embedding
if i > 0:
# allows us to reuse the LSTM tensor variable on each iteration
tf.get_variable_scope().reuse_variables()
out, state = self.lstm(current_embedding, state)
#out, state = self.tf.nn.dynamic_rnn(current_embedding, state)
if i > 0:
#get the one-hot representation of the next word in our caption
labels = tf.expand_dims(caption_placeholder[:, i], 1)
ix_range=tf.range(0, self.batch_size, 1)
ixs = tf.expand_dims(ix_range, 1)
concat = tf.concat([ixs, labels],1)
onehot = tf.sparse_to_dense(
concat, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)
#perform a softmax classification to generate the next word in the caption
logit = tf.matmul(out, self.word_encoding) + self.word_encoding_bias
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=onehot)
xentropy = xentropy * mask[:,i]
loss = tf.reduce_sum(xentropy)
total_loss += loss
total_loss = total_loss / tf.reduce_sum(mask[:,1:])
return total_loss, img, caption_placeholder, mask
### Parameters ###
dim_embed = 256
dim_hidden = 256
dim_in = 4096
batch_size = 128
momentum = 0.9
n_epochs = 150
def train(learning_rate=0.001, continue_training=False, transfer=True):
tf.reset_default_graph()
feats, captions = get_data(annotation_path, feature_path)
wordtoix, ixtoword, init_b = preProBuildWordVocab(captions)
np.save('data/ixtoword', ixtoword)
index = (np.arange(len(feats)).astype(int))
np.random.shuffle(index)
sess = tf.InteractiveSession()
n_words = len(wordtoix)
maxlen = np.max( [x for x in map(lambda x: len(x.split(' ')), captions) ] )
caption_generator = Caption_Generator(dim_in, dim_hidden, dim_embed, batch_size, maxlen+2, n_words, init_b)
loss, image, sentence, mask = caption_generator.build_model()
saver = tf.train.Saver(max_to_keep=100)
global_step=tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate, global_step,
int(len(index)/batch_size), 0.95)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
tf.global_variables_initializer().run()
if continue_training:
if not transfer:
saver.restore(sess,tf.train.latest_checkpoint(model_path))
else:
saver.restore(sess,tf.train.latest_checkpoint(model_path_transfer))
losses=[]
for epoch in range(n_epochs):
for start, end in zip( range(0, len(index), batch_size), range(batch_size, len(index), batch_size)):
current_feats = feats[index[start:end]]
current_captions = captions[index[start:end]]
current_caption_ind = [x for x in map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions)]
current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=maxlen+1)
current_caption_matrix = np.hstack( [np.full( (len(current_caption_matrix),1), 0), current_caption_matrix] )
current_mask_matrix = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
nonzeros = np.array([x for x in map(lambda x: (x != 0).sum()+2, current_caption_matrix )])
for ind, row in enumerate(current_mask_matrix):
row[:nonzeros[ind]] = 1
_, loss_value = sess.run([train_op, loss], feed_dict={
image: current_feats.astype(np.float32),
sentence : current_caption_matrix.astype(np.int32),
mask : current_mask_matrix.astype(np.float32)
})
print("Current Cost: ", loss_value, "\t Epoch {}/{}".format(epoch, n_epochs), "\t Iter {}/{}".format(start,len(feats)))
print("Saving the model from epoch: ", epoch)
saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
Branching in the loss building routine is invalid.
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i > 0:
[...]
else:
[...]
if i > 0:
[...]
if i > 0:
[...]
Note, that last two ifs will never run, as they are in the else clause, meaning that i <= 0. Consequently your loss is actually a constant, equal 0, and thus TF do not see how to optimise it wrt. variables.

Poker Hand dataset in Tensor flow accuracy very bad

I am trying to train a neural network for Poker Hand Dataset (10 classes). I have tried to change mnist exampe to fit for this. However, for my program, the accuracy is always about 50%, that is so bothersome. How can I improve the accuracy?
def init_weights(shape):
""" Weight initialization """
weights = tf.random_normal(shape, stddev=0.1)
return tf.Variable(weights)
def forwardprop(X, weights, biases):
"""
Forward-propagation.
IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
"""
h = tf.nn.sigmoid(tf.add(tf.matmul(X, weights['w_1']),biases['b_1'])) # The \sigma function
yhat = tf.add(tf.matmul(h, weights['w_2']),biases['b_2']) # The \varphi function
return yhat
def get_data(filename, targetname="target", idname="", test_size=0.10, random_state=200):
#read data from csv
df = pd.read_csv(filename)
data = pd.DataFrame(df.ix[:, df.columns != targetname])
if(idname != str("")):
df = df.drop(idname, 1)
data = pd.DataFrame(df.ix[:, df.columns != targetname])
data = pd.get_dummies(data)
all_X = data.as_matrix()
target = df[targetname]
target = pd.factorize(target)[0]
# Convert target into one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target] # One liner trick!
return train_test_split(all_X, all_Y, test_size=test_size, random_state=random_state)
def main():
start_time = time.time()
train_X, test_X, train_y, test_y = get_data(filename = './data/poker-train.csv', targetname = "class")
#customized for this dataset (or any large dataset), must be chosen as per the data, need to find some generic way
#for small datasets: batch size can be 1 (for more accuracy),
#for large ones: somewhr around 50-80, if taken 1 very slow,50-80 is a trade off of accuracy for time
learning_rate = 0.01
training_epochs = 100
batch_size = 1
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes
h_size = train_X.shape[1] # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes
# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
# Weight initializations
weights = {
'w_1' : init_weights((x_size, h_size)),
'w_2' : init_weights((h_size, y_size))
}
# Bias initializations
biases = {
'b_1': init_weights([h_size]),
'b_2': init_weights([y_size])
}
# Forward propagation
yhat = forwardprop(X, weights, biases)
predict = tf.argmax(yhat, axis=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat))
updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
total_batch = int(train_X.shape[0]/batch_size)
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
beg_i=0
# Loop over all batches
for i in range(total_batch):
end_i = beg_i + batch_size
if(end_i > train_X.shape[0]):
end_i = train_X.shape[0]
batch_x, batch_y = train_X[beg_i:end_i,:],train_y[beg_i:end_i,:]
beg_i = beg_i + batch_size
sess.run(updates, feed_dict={X: batch_x, y: batch_y})
train_accuracy = np.mean(np.argmax(train_y, axis=1) == sess.run(predict, feed_dict={X: train_X, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) == sess.run(predict, feed_dict={X: test_X, y: test_y}))
print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
% (epoch + 1, 100. * train_accuracy, 100. * test_accuracy))
# # Test model
# correct_prediction = tf.equal(tf.argmax(predict, 1), tf.argmax(y, 1))
# # Calculate accuracy
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# print( "Accuracy:", accuracy.eval({X: test_X, y: test_y}))
print("Total time of execution: ",time.time()-start_time)
if __name__ == '__main__':
main()
Output is
Epoch = 100, train accuracy = 55.77%, test accuracy = 55.30%
Epoch = 1, train accuracy = 50.13%, test accuracy = 50.20%
batch_size = 50#1
training_epochs = int(train_X.shape[0]/batch_size)
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes
h_size = 100#train_X.shape[1] # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes
I modify above.
Epoch = 1, train accuracy = 49.98%, test accuracy = 50.11%
Epoch = 500, train accuracy = 90.90%, test accuracy = 90.78%

How to write denoising autoencoder as RNN with tensorflow

I want to adapt this Recurrent Neural Network in Tensorflow (from this tutorial
https://github.com/aymericdamien/TensorFlow-Examples/
and then the RNN program)
), so that it will be a denoising autoencoder.
I have 5 time steps, and at each time, the noiseless target is sampled from sin(x), and the noisy input is sin(x)+ Gaussian error.
Now my problem is that the RNN from the example gives me 1 output value for each sequence of inputs, but I want an output for each time step ( I want 5 outputs, not 1)
How do I do this? I suspect it may be a matter of redefining the weights and biases, but how?
Here is the code. Many thanks for your help,
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
# Parameters
learning_rate = 0.0005
training_iters = 1000
batch_size = 3
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 5 # timesteps
n_hidden = 40 # hidden layer num of features
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_input])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
'out': tf.Variable(tf.random_normal([ n_output]))
}
# length of time series to be sampled
N = 1000000
dim_input = 2
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.05)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.05)
def next_batch():
batch = np.empty([batch_size,n_steps,dim_input])
batch_y = np.empty([batch_size,n_steps,dim_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps- 1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
def RNN(x, weights, biases):
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
# SSE, there must be an easier way to do this
def get_cost(prediction,truth):
z = 0
for i in range(0,batch_size):
z = z + np.square(np.add(prediction[i,:], np.multiply(-1,truth[i,:])))
z = np.add(z[0],z[1])
z = np.sum(z)
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).
minimize(cost)
# Evaluate model
accuracy = cost
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(loss)
step += 1
print("Optimization Finished!")
If I run this, I get this error message:
ValueError: Shape (?, 5, 2) must have rank 2. This seems fair enough, because the target is 5 steps long, and the output only 1. But how do I fix that?
Many thanks.
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
import matplotlib.pyplot as plt
## Denoising autoencoder.
import numpy as np
count = 0
# length of time series to be sampled
N = 10000
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
batch_size = 30
learning_rate = 0.0005
training_iters = 300000
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 15 # timesteps
n_hidden = 75 # hidden layer num of
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.1)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.1)
def next_batch():
batch = np.empty([batch_size,n_steps,n_input])
batch_y = np.empty([batch_size,n_steps,n_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps-1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
# Parameters
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_output])
N_train = N - 500
def RNN(x):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(num_units = n_hidden, forget_bias=1.0, num_proj=2)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
return outputs
print(x)
pred = RNN(x)
# Define loss and optimizer
def get_cost(prediction,truth):
#print('pred' + str(prediction))
# SSE. there must be an easier way than this:
z = 0
for step in range(0,n_steps):
for b in range(0,batch_size):
for y_dim in range(0,2):
d1 = prediction[step][b,y_dim]
d2 = truth[b,step,y_dim]
diff= (d1 - d2 )
z = z + diff * diff
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
#print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
# Reshape data to get 28 seq of 28 elements
#batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(str(step) + ':' + str(loss))
step += 1
print("Optimization Finished!")
batch_size = 1
test_data, test_label, inits = next_batch()
#print "Testing Accuracy:", \
#sess.run(accuracy, feed_dict={x: test_data, y: test_label})
p2 = sess.run(pred, feed_dict={x: test_data, y: test_label})
#print('---inits---')
#print(inits)
print('---batch---')
print(test_data)
print('---truth---')
print(test_label)
print('---pred---')
print(p2)
c_final = get_cost(p2, test_label)
print(c_final)
First, we generate some data: a 2-dimensional series of sin(i) and cos(i), with i running from 1 to N. This gives us the variable y. Then we add some Normal noise to this series, and that's x. Then, we train a Recurrent Neural Net to create the clean output from the noisy input. In other words, we train the net such that it will output [cos(i),sin(i)] from input [cos(i)+e1,sin(i)+e2) ]. This is a plain vanilla denoising autoencoder, except that the data has a time element. Now you can feed new data into the neural net, and it will hopefully remove the noise.