TensorFlow: loss jumps up after restoring RNN net - tensorflow

Environment info
Operating System: Windows 7 64-bit
Tensorflow installed from pre-built pip (no CUDA): 1.0.1
Python 3.5.2 64-bit
Problem
I have problems with restoring my net (RNN character base language model). Below is a simplified version with the same problem.
When I run it the first time, I get, for example, this.
...
step 160: loss = 1.956 (perplexity = 7.069016620211226)
step 180: loss = 1.837 (perplexity = 6.274748642468816)
step 200: loss = 1.825 (perplexity = 6.202084762557817)
But on the second run, after restoring parameters, I get this.
step 220: loss = 2.346 (perplexity = 10.446611983898903)
step 240: loss = 2.346 (perplexity = 10.446709120339545)
...
All the tf variables seem to be correctly restored, including the state, which will be fed to RNN.
Data position is also restored (from 'step').
I also made a similar program for MNIST recognition model, and this one works fine: the losses before and after the restoring are continuous.
Are there any other parameters or states that should be saved and restored?
import argparse
import os
import tensorflow as tf
import numpy as np
import math
B = 20 # batch size
H = 200 # size of hidden layer of neurons
T = 25 # number of time steps to unroll the RNN for
data_file = 'ptb.train.txt' # any plain text file will do
checkpoint_dir = "tmp"
#----------------
# prepare data
#----------------
data = open(data_file, 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has {0} characters, {1} unique.'.format(data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
input_index_raw = np.array([char_to_ix[ch] for ch in data])
input_index_raw = input_index_raw[0:len(input_index_raw) // T * T]
input_index_raw_shift = np.append(input_index_raw[1:], input_index_raw[0])
input_all = input_index_raw.reshape([-1, T])
target_all = input_index_raw_shift.reshape([-1, T])
num_packed_data = len(input_all)
#----------------
# build model
#----------------
class Model(object):
def __init__(self):
self.input_ph = tf.placeholder(tf.int32, [None, T], name="input_ph")
self.target_ph = tf.placeholder(tf.int32, [None, T], name="target_ph")
embedding = tf.get_variable("embedding", [vocab_size, H], initializer=tf.random_normal_initializer(), dtype=tf.float32)
# input_ph is B x T.
# input_embedded is B x T x H.
input_embedded = tf.nn.embedding_lookup(embedding, self.input_ph)
cell = tf.contrib.rnn.BasicRNNCell(H)
self.state_ph = tf.placeholder(tf.float32, (None, cell.state_size), name="state_ph")
# Make state variable so that it will be saved by the saver.
self.state = tf.get_variable("state", (B, cell.state_size), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32)
# Construct initial_state according to whether restoring or not.
self.isRestore = tf.placeholder(tf.bool, shape=(), name="isRestore")
zero_state = cell.zero_state(B, dtype=tf.float32)
self.initial_state = tf.cond(self.isRestore, lambda: self.state, lambda: zero_state)
# input_embedded : B x T x H
# output: B x T x H
# state : B x cell.state_size
output, state_ = tf.nn.dynamic_rnn(cell, input_embedded, initial_state=self.state_ph)
self.final_state = tf.assign(self.state, state_)
# reshape to (B * T) x H.
output_flat = tf.reshape(output, [-1, H])
# Convert hidden layer's output to vector of logits for each vocabulary.
softmax_w = tf.get_variable("softmax_w", [H, vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32)
logits = tf.matmul(output_flat, softmax_w) + softmax_b
# cross_entropy is a vector of length B * T
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(self.target_ph, [-1]), logits=logits)
self.loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
self.global_step = tf.get_variable("global_step", (), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int32)
self.training_op = optimizer.minimize(cross_entropy, global_step=self.global_step)
def train_batch(self, sess, input_batch, target_batch, initial_state):
final_state_, _, final_loss = sess.run([self.final_state, self.training_op, self.loss], feed_dict={self.input_ph: input_batch, self.target_ph: target_batch, self.state_ph: initial_state})
return final_state_, final_loss
# main
with tf.Session() as sess:
if not tf.gfile.Exists(checkpoint_dir):
tf.gfile.MakeDirs(checkpoint_dir)
batch_stride = num_packed_data // B
# make model
model = Model()
saver = tf.train.Saver()
# always initialize
init = tf.global_variables_initializer()
init.run()
# restore if necessary
isRestore = False
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt:
isRestore = True
last_model = ckpt.model_checkpoint_path
print("Loading " + last_model)
saver.restore(sess, last_model)
# set initial step
step = tf.train.global_step(sess, model.global_step) + 1
print("start step = {0}".format(step))
# fetch initial state
state = sess.run(model.initial_state, feed_dict={model.isRestore: isRestore})
print("Initial state: {0}".format(state))
while True:
# prepare batch data
idx = [(step + x * batch_stride) % num_packed_data for x in range(0, B)]
input_batch = input_all[idx]
target_batch = target_all[idx]
state, last_loss = model.train_batch(sess, input_batch, target_batch, state)
if step % 20 == 0:
print('step {0}: loss = {1:.3f} (perplexity = {2})'.format(step, last_loss, math.exp(last_loss)))
if step % 200 == 0:
saved_file = saver.save(sess, os.path.join(checkpoint_dir, "model.ckpt"), global_step=step)
print("Saved to " + saved_file)
print("Last state: {0}".format(model.state.eval()))
break;
step = step + 1

The problem is solved. It had nothing to do with RNN nor TensorFlow.
I changed
chars = list(set(data))
to
chars = sorted(set(data))
and now it works.
This is because python uses a random hash function to build the set, and every time python restarted, 'chars' had a different ordering.

Related

Tensorflow value error: Variable already exists, disallowed

I am predicting financial time series with different time periods using tensorflow. In order to divide input data, I made sub-samples and used for loop.
However, I got an ValueError like this;
ValueError: Variable rnn/basic_lstm_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
Without subsample this code works well.
Below is my code.
import tensorflow as tf
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
class lstm:
def __init__(self, x, y):
# train Parameters
self.seq_length = 50
self.data_dim = x.shape[1]
self.hidden_dim = self.data_dim*2
self.output_dim = 1
self.learning_rate = 0.0001
self.iterations = 5 # originally 500
def model(self,x,y):
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - self.seq_length):
_x = x[i:i + self.seq_length]
_y = y[i + self.seq_length]
dataX.append(_x)
dataY.append(_y)
train_size = int(len(dataY) * 0.7977)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
print(train_size,test_size)
# input place holders
X = tf.placeholder(tf.float32, [None, self.seq_length, self.data_dim])
Y = tf.placeholder(tf.float32, [None, 1])
# build a LSTM network
cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.hidden_dim,state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
self.Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], self.output_dim, activation_fn=None)
# We use the last cell's output
# cost/loss
loss = tf.reduce_sum(tf.square(self.Y_pred - Y)) # sum of the squares
# optimizer
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train = optimizer.minimize(loss)
# RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
# training
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for i in range(self.iterations):
_, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY})
# prediction
train_predict = sess.run(self.Y_pred, feed_dict={X: trainX})
test_predict = sess.run(self.Y_pred, feed_dict={X: testX})
return train_predict, test_predict
# variables definition
tsx = []
tsy = []
tsr = []
trp = []
tep = []
x = np.loadtxt('data.csv', delimiter=',') # data for analysis
y = x[:,[-1]]
z = np.loadtxt('rb.csv', delimiter=',') # data for time series
z1 = z[:,0] # start cell
z2 = z[:,1] # end cell
for i in range(1): # need to change to len(z)
globals()['x_%s' % i] = x[int(z1[i]):int(z2[i]),:] # definition of x
tsx.append(globals()["x_%s" % i])
globals()['y_%s' % i] = y[int(z1[i])+1:int(z2[i])+1,:] # definition of y
tsy.append(globals()["y_%s" % i])
globals()['a_%s' % i] = lstm(tsx[i],tsy[i]) # definition of class
globals()['trp_%s' % i],globals()['tep_%s' % i] = globals()["a_%s" % i].model(tsx[i],tsy[i])
trp.append(globals()["trp_%s" % i])
tep.append(globals()["tep_%s" % i])
Everytime the model method is called, you are building the computational graph of your LSTM. The second time the model method is called, tensorflow discovers that you already created variables with the same name. If the reuse flag of the scope in which the variables are created, is set to False, a ValueError is raised.
To solve this problem you have to set the reuse flag to True by calling tf.get_variable_scope().reuse_variables() at the end of your loop.
Note that you can't add this in the beginning of your loop, because then you are trying to reuse variables that have not yet been created.
You find more info in the tensorflow docs here
You define some variables in the "model" function.
Try this when you want to call "model" function multiple times:
with tf.variable_scope("model_fn") as scope:
train_predict, test_predict = model(input1)
with tf.variable_scope(scope, reuse=True):
train_predict, test_predict = model(input2)

Tensorflow RNN: Perplexity per Epoch remains constant

I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()

How to implement metrics learning using siamese neural network in Tensorflow

I'm trying to implement metrics learning using Contrastive Loss like in Caffe example and plot results like in example:
(source: researchgate.net)
I tried to use simple fully connected layers in Caffe and it works well (result as on picture above),
but I get different result
Could anyone help me to find issue in my code or suggest how to implement Caffe example in Tensorflow.
Here is my code:
# In[1]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers.python.layers import initializers
from tensorflow.examples.tutorials.mnist import input_data
from math import sqrt
import numpy as np
from sklearn.manifold import TSNE
get_ipython().magic('matplotlib inline')
get_ipython().magic('pylab inline')
# In[2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
# In[3]:
learning_rate = 0.00001
training_epochs = 15
batch_size = 100
display_step = 1
logs_path = './tensorflow_logs/mnist_metrics'
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 28*28 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
margin = 1.0
# In[4]:
x_left = tf.placeholder(tf.float32, shape=[None, n_input], name='InputDataLeft')
x_right = tf.placeholder(tf.float32, shape=[None, n_input], name='InputDataRight')
label = tf.placeholder(tf.float32, shape=[None, 1], name='LabelData') # 0 if the same, 1 is different
x_image_left = x_left
x_image_right = x_right
# In[5]:
# def NN(inputs):
# In[6]:
def tfNN(x, weights, biases):
x = tf.scalar_mul(1.0/256.0, x)
layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
out_layer = tf.add(tf.matmul(layer_3, weights['w4']), biases['b4'])
return out_layer
# In[7]:
# Store layers weight & bias
weights = {
'w1': tf.Variable(tf.random_uniform([n_input, n_hidden_1], minval=-4*np.sqrt(6.0/(n_input + n_hidden_1)), maxval=4*np.sqrt(6.0/(n_input + n_hidden_1))), name='W1'),
'w2': tf.Variable(tf.random_uniform([n_hidden_1, n_hidden_2], minval=-4*np.sqrt(6.0/(n_hidden_1 + n_hidden_2)), maxval=4*np.sqrt(6.0/(n_hidden_1 + n_hidden_2))), name='W2'),
'w3': tf.Variable(tf.random_uniform([n_hidden_2, n_classes], minval=-4*np.sqrt(6.0/(n_hidden_2 + n_classes)), maxval=4*np.sqrt(6.0/(n_hidden_2 + n_classes))), name='W3'),
'w4': tf.Variable(tf.random_uniform([n_classes, 2], minval=-4*np.sqrt(6.0/(n_classes + 2)), maxval=4*np.sqrt(6.0/(n_classes + 2))), name='W4')
}
biases = {
'b1': tf.Variable(tf.truncated_normal([n_hidden_1]) / sqrt(n_hidden_1), name='b1'),
'b2': tf.Variable(tf.truncated_normal([n_hidden_2]) / sqrt(n_hidden_2), name='b2'),
'b3': tf.Variable(tf.truncated_normal([n_classes]) / sqrt(n_classes), name='b3'),
'b4': tf.Variable(tf.truncated_normal([2]) / sqrt(2), name='b4')
}
# In[8]:
with tf.name_scope('Model'):
# Model
pred_left = tfNN(x_image_left, weights, biases)
pred_right = tfNN(x_image_right, weights, biases)
with tf.name_scope('Loss'):
# Minimize error using cross entropy
# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
d = tf.reduce_sum(tf.square(pred_left - pred_right), 1)
d_sqrt = tf.sqrt(d)
loss = label * tf.square(tf.maximum(0.0, margin - d_sqrt)) + (1 - label) * d
loss = 0.5 * tf.reduce_mean(loss)
with tf.name_scope('AdamOptimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# In[9]:
# Initializing the variables
init = tf.global_variables_initializer()
# Create a summary to monitor cost tensor
tf.scalar_summary("loss", loss)
# Merge all summaries into a single op
merged_summary_op = tf.merge_all_summaries()
# In[10]:
# Launch the graph
sess = tf.Session()
sess.run(init)
# op to write logs to Tensorboard
summary_writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())
# Training cycle
for epoch in range(training_epochs):
avg_loss = 0.0
total_batch = int(mnist.train.num_examples / batch_size)
# Loop over all batches
for i in range(total_batch):
left_batch_xs, left_batch_ys = mnist.train.next_batch(batch_size)
right_batch_xs, right_batch_ys = mnist.train.next_batch(batch_size)
labels = np.zeros((batch_size, 1))
for l in range(batch_size):
if left_batch_ys[l] == right_batch_ys[l]:
labels[l, 0] = 0.0
else:
labels[l, 0] = 1.0
_, l, summary = sess.run([optimizer, loss, merged_summary_op],
feed_dict = {
x_left: left_batch_xs,
x_right: right_batch_xs,
label: labels,
})
# Write logs at every iteration
summary_writer.add_summary(summary, epoch * total_batch + i)
# Compute average loss
avg_loss += l / total_batch
# Display logs per epoch step
if (epoch+1) % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "loss =", "{:.9f}".format(avg_loss))
print ("Optimization Finished!")
print ("Run the command line:\n" "--> tensorboard --logdir=./tensorflow_logs " "\nThen open http://0.0.0.0:6006/ into your web browser")
# In[11]:
# Test model
# Calculate accuracy
test_xs, test_ys = mnist.train.next_batch(5000)
ans = sess.run([pred_left], feed_dict = { x_left: test_xs})
# In[12]:
ans = ans[0]
# In[13]:
#test_ys
# In[14]:
figure(figsize=(10,10))
# scatter(r[:,0], r[:,1], c=[test_ys[x,:].argmax() for x in range(len(test_ys))])
scatter(ans[:,0], ans[:,1], c=test_ys[:])
I found issue in my Contrastive Loss implementation. It requires set keep_dims=True in distance calculation.
Here is correct:
with tf.name_scope('Loss'):
d = tf.reduce_sum(tf.square(tf.sub(pred_left, pred_right)), 1, keep_dims=True)
d_sqrt = tf.sqrt(d)
loss = label * tf.square(tf.maximum(0.0, margin - d_sqrt)) + (1 - label) * d
loss = 0.5 * tf.reduce_mean(loss)
Now I have correct result:

How to write denoising autoencoder as RNN with tensorflow

I want to adapt this Recurrent Neural Network in Tensorflow (from this tutorial
https://github.com/aymericdamien/TensorFlow-Examples/
and then the RNN program)
), so that it will be a denoising autoencoder.
I have 5 time steps, and at each time, the noiseless target is sampled from sin(x), and the noisy input is sin(x)+ Gaussian error.
Now my problem is that the RNN from the example gives me 1 output value for each sequence of inputs, but I want an output for each time step ( I want 5 outputs, not 1)
How do I do this? I suspect it may be a matter of redefining the weights and biases, but how?
Here is the code. Many thanks for your help,
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
# Parameters
learning_rate = 0.0005
training_iters = 1000
batch_size = 3
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 5 # timesteps
n_hidden = 40 # hidden layer num of features
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_input])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
'out': tf.Variable(tf.random_normal([ n_output]))
}
# length of time series to be sampled
N = 1000000
dim_input = 2
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.05)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.05)
def next_batch():
batch = np.empty([batch_size,n_steps,dim_input])
batch_y = np.empty([batch_size,n_steps,dim_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps- 1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
def RNN(x, weights, biases):
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
# SSE, there must be an easier way to do this
def get_cost(prediction,truth):
z = 0
for i in range(0,batch_size):
z = z + np.square(np.add(prediction[i,:], np.multiply(-1,truth[i,:])))
z = np.add(z[0],z[1])
z = np.sum(z)
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).
minimize(cost)
# Evaluate model
accuracy = cost
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(loss)
step += 1
print("Optimization Finished!")
If I run this, I get this error message:
ValueError: Shape (?, 5, 2) must have rank 2. This seems fair enough, because the target is 5 steps long, and the output only 1. But how do I fix that?
Many thanks.
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
import matplotlib.pyplot as plt
## Denoising autoencoder.
import numpy as np
count = 0
# length of time series to be sampled
N = 10000
x1 = np.zeros(N)
x2 = np.zeros(N)
y1 = np.zeros(N)
y2 = np.zeros(N)
batch_size = 30
learning_rate = 0.0005
training_iters = 300000
display_step = 100
# Network Parameters
n_input = 2
n_output = 2
n_steps = 15 # timesteps
n_hidden = 75 # hidden layer num of
# generate data
for i in range(0,N):
# clean
y1[i] = np.math.sin(i)
y2[i] = np.math.cos(i)
# noisy
x1[i] = y1[i]+np.random.normal(loc=0.0, scale=0.1)
x2[i] = y2[i]+np.random.normal(loc=0.0, scale=0.1)
def next_batch():
batch = np.empty([batch_size,n_steps,n_input])
batch_y = np.empty([batch_size,n_steps,n_input])
# for plotting purposes only
inits = np.empty([batch_size], dtype=int)
for b in range(0,batch_size):
# the first one of the batch
inits[b] = int(np.round(np.random.uniform(low=0,high=N-n_steps-1)))
init = inits[b]
for i in range(0,n_steps):
# noisy input
batch[b,i,0] = x1[init + i]
batch[b,i,1] = x2[init + i]
# target (no noise)"
batch_y[b,i,0] = y1[init+i]
batch_y[b,i,1] = y2[init+i]
return(batch,batch_y,inits)
# Parameters
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_steps, n_output])
N_train = N - 500
def RNN(x):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(num_units = n_hidden, forget_bias=1.0, num_proj=2)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
return outputs
print(x)
pred = RNN(x)
# Define loss and optimizer
def get_cost(prediction,truth):
#print('pred' + str(prediction))
# SSE. there must be an easier way than this:
z = 0
for step in range(0,n_steps):
for b in range(0,batch_size):
for y_dim in range(0,2):
d1 = prediction[step][b,y_dim]
d2 = truth[b,step,y_dim]
diff= (d1 - d2 )
z = z + diff * diff
return(z)
cost = get_cost(pred,y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
#print('step '+ str(step))
batch_x, batch_y, inits = next_batch()
# Reshape data to get 28 seq of 28 elements
#batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print(str(step) + ':' + str(loss))
step += 1
print("Optimization Finished!")
batch_size = 1
test_data, test_label, inits = next_batch()
#print "Testing Accuracy:", \
#sess.run(accuracy, feed_dict={x: test_data, y: test_label})
p2 = sess.run(pred, feed_dict={x: test_data, y: test_label})
#print('---inits---')
#print(inits)
print('---batch---')
print(test_data)
print('---truth---')
print(test_label)
print('---pred---')
print(p2)
c_final = get_cost(p2, test_label)
print(c_final)
First, we generate some data: a 2-dimensional series of sin(i) and cos(i), with i running from 1 to N. This gives us the variable y. Then we add some Normal noise to this series, and that's x. Then, we train a Recurrent Neural Net to create the clean output from the noisy input. In other words, we train the net such that it will output [cos(i),sin(i)] from input [cos(i)+e1,sin(i)+e2) ]. This is a plain vanilla denoising autoencoder, except that the data has a time element. Now you can feed new data into the neural net, and it will hopefully remove the noise.

How to generate a sample sentence with LSTM model in Tensorflow?

I'm working with the LSTM model in Tensorflow.
I already trained and saved the LSTM model. Now I'm coming up to the last task to generate the sentences.
Here is my pseudo code:
# We have already the run_epoch(session, m, data, eval_op, verbose=False) function with fee_dict like this:
feed_dict = {m.input_data: x,
m.targets: y,
m.initial_state: state}
...
# train and save model
...
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = get_logits(model, new_sentence)
# get argmax(logits) or sample(logits)
next_word = argmax(logits)
new_sentence.append(next_word)
print(new_sentence)
My question is:
When training, validating, or testing model I have to feed both of the inputs and their labels (by shifted inputs one) into model via feed_dict dictionary. But in the generating task, I have only one input which is the generating sentence new_sentence.
How can I build the right get_logits function or full generate function also?
when you train you have an output of the neural network, based on that output you calculate the error, based on error you create the optimizer to minimize the error.
In order to generate a new sentence you need to get just the output of the neural network(rnn).
Edited:
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unpack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(1, [rnn_input, state]), W) + b)
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, num_steps, y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logit,label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
def train():
with tf.Session() as sess:
#load the model
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
#save the model
def generate_seq():
with tf.Session() as sess:
#load the model
# load saved model for generating task
new_sentence = [START_TOKEN]
# Here I want to generate a sentence until END_TOKEN is generated.
while new_sentence[-1] != END_TOKEN:
logits = sess.run(final_state,{x:np.asarray([new_sentence])})
# get argmax(logits) or sample(logits)
next_word = argmax(logits[0])
new_sentence.append(next_word)
print(new_sentence)