Simple softmax classifier in tensorflow - tensorflow

So I am trying to write a simple softmax classifier in TensorFlow.
Here is the code:
# Neural network parameters
n_hidden_units = 500
n_classes = 10
# training set placeholders
input_X = tf.placeholder(dtype='float32',shape=(None,X_train.shape[1], X_train.shape[2]),name="input_X")
input_y = tf.placeholder(dtype='int32', shape=(None,), name="input_y")
# hidden layer
dim = X_train.shape[1]*X_train.shape[2] # dimension of each traning data point
flatten_X = tf.reshape(input_X, shape=(-1, dim))
weights_hidden_layer = tf.Variable(initial_value=np.zeros((dim,n_hidden_units)), dtype ='float32')
bias_hidden_layer = tf.Variable(initial_value=np.zeros((1,n_hidden_units)), dtype ='float32')
hidden_layer_output = tf.nn.relu(tf.matmul(flatten_X, weights_hidden_layer) + bias_hidden_layer)
# output layer
weights_output_layer = tf.Variable(initial_value=np.zeros((n_hidden_units,n_classes)), dtype ='float32')
bias_output_layer = tf.Variable(initial_value=np.zeros((1,n_classes)), dtype ='float32')
output_logits = tf.matmul(hidden_layer_output, weights_output_layer) + bias_output_layer
predicted_y = tf.nn.softmax(output_logits)
# loss
one_hot_labels = tf.one_hot(input_y, depth=n_classes, axis = -1)
loss = tf.losses.softmax_cross_entropy(one_hot_labels, output_logits)
# optimizer
optimizer = tf.train.MomentumOptimizer(0.01, 0.5).minimize(
loss, var_list=[weights_hidden_layer, bias_hidden_layer, weights_output_layer, bias_output_layer])
This compiles, and I have checked the shape of all the tensor and it coincides with what I expect.
However, I tried to run the optimizer using the following code:
# running the optimizer
s = tf.InteractiveSession()
s.run(tf.global_variables_initializer())
for i in range(5):
s.run(optimizer, {input_X: X_train, input_y: y_train})
loss_i = s.run(loss, {input_X: X_train, input_y: y_train})
print("loss at iter %i:%.4f" % (i, loss_i))
And the loss kept being the same in all iterations!
I must have messed up something, but I fail to see what.
Any ideas? I also appreciate if somebody leaves comments regarding code style and/or tensorflow tips.

You have made a mistake. You are initializing your weights using np.zeros. Use np.random.normal. You can choose mean for this Gaussian Distribution by using number of inputs going to a particular neuron. You can read more about it here.
The reason that you want to initialize with Gaussian Distribution is because you want to break symmetry. If all the weights are initialized by zero, then you can use backpropogation to see that all the weights will evolved same.

One could visualize the weight histogram using TensorBoard to make it easier. I executed your code for this. A few more lines are needed to set up Tensorboard logging but the histogram summary of weights can be easily added.
Initialized to zeros
weights_hidden_layer = tf.Variable(initial_value=np.zeros((784,n_hidden_units)), dtype ='float32')
tf.summary.histogram("weights_hidden_layer",weights_hidden_layer)
Xavier initialization
initializer = tf.contrib.layers.xavier_initializer()
weights_hidden_layer = tf.Variable(initializer(shape=(784,n_hidden_units)), dtype ='float32')
tf.summary.histogram("weights_hidden_layer",weights_hidden_layer)

Related

ValueError: No gradients provided for any variable when tensorflow operations added on keras output

I have a pre-trained Keras Sequential model called agent, and I'm trying to fine-tune it with a loss function.
json_file = open('model/prior_model_RMSprop.json', 'r')
json_model = json_file.read()
json_file.close()
agent = model_from_json(json_model)
prior = model_from_json(json_model)
# load weights into model
agent.load_weights('model/model_RMSprop.h5')
prior.load_weights('model/model_RMSprop.h5')
agent_output = agent.output
prior_output = prior.output
loss = tf.reduce_mean(tf.square(agent_output - prior_output))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
So far, everything works fine. However, when I add some basic tensorflow operations, the error happens
agent_logits = tf.cast(tf.argmax(agent_output, axis = 2), dtype = tf.float32)
prior_logits = tf.cast(tf.argmax(prior_output, axis = 2), dtype = tf.float32)
loss = tf.reduce_mean(tf.square(agent_logits - prior_logits))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
ValueError: No gradients provided for any variable
So the tensorflow operations break the connection between the model and the loss function? I've been stucked here for almost 2 weeks so pls help. I'm also not very clear about how to update a Keras model's trainable weights with the loss function I defined. Any hints or related links will be appreciated!!!

How to restore weights and biases in Tensorflow?

I am using Tensorflow and Python to define and train a NN in the following manner:
# clear old tensor values that cause a problem when rerunning
tf.reset_default_graph()
#network dims
hiddenneurons=12
input_dim=3
# Set up the model
def neural_net_model(X_data,input_dim):
W_1 = tf.Variable(tf.random_uniform([input_dim,hiddenneurons]),name='W_1')
b_1 = tf.Variable(tf.zeros([hiddenneurons]),name='b_1')
layer_1 = tf.add(tf.matmul(X_data,W_1), b_1)
layer_1 = tf.nn.tanh(layer_1)
# layer 1 multiplying and adding bias then activation function
W_O = tf.Variable(tf.random_uniform([hiddenneurons,1]),name='W_O')
b_O = tf.Variable(tf.zeros([1]),name='b_O')
output = tf.add(tf.matmul(layer_1,W_O), b_O)
# O/p layer multiplying and adding bias then activation function
return output
xs = tf.placeholder("float")
ys = tf.placeholder("float")
output = neural_net_model(xs,3)
cost = tf.reduce_mean(tf.square(output-ys))
train = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
c_t = []
c_test = []
with tf.Session() as sess:
# Initiate session and initialize all vaiables
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
for i in range(10):
for j in range(X_train.shape[0]):
sess.run([cost,train],feed_dict={xs:X_train.values[j,:].reshape(1,3), ys:y_train.values[j]})
# Run cost and train with each sample
c_t.append(sess.run(cost, feed_dict={xs:X_train,ys:y_train}))
c_test.append(sess.run(cost, feed_dict={xs:X_test,ys:y_test}))
print('Epoch :',i,'Cost :',c_t[i])
plt.scatter(i,c_t[i])
pred = sess.run(output, feed_dict={xs:X_test})
print(xs)
# predict output of test data after training
print('Cost :',sess.run(cost, feed_dict={xs:X_test,ys:y_test}))
# save model
saver.save(sess,'save folder')
print('Model saved')
At this point, if I output the trainable variables with tf.trainable_variables(), I get four variables of the expected shape for W_1:0, W_O:0, b_1:0, b_O:0.
I want to be able have a different file which restores the model, then uses the same weights and biases as saved, to allow me to run a variety of testing data through.
I am having trouble restoring the model and persuading it to reuse the past weights and biases. My restore code looks like this:
# clear old tensor values
tf.reset_default_graph()
newsaver = tf.train.import_meta_graph(modelloc)
def neural_net_model(X_data,input_dim):
W_1 = tf.Variable(tf.random_uniform([input_dim,hiddenneurons]))
b_1 = tf.Variable(tf.zeros([hiddenneurons]))
layer_1 = tf.add(tf.matmul(X_data,W_1), b_1)
layer_1 = tf.nn.tanh(layer_1)
W_O = tf.Variable(tf.random_uniform([hiddenneurons,1]))
b_O = tf.Variable(tf.zeros([1]))
output = tf.add(tf.matmul(layer_1,W_O), b_O)
xs = tf.placeholder("float")
ys = tf.placeholder("float")
output = neural_net_model(xs,3)
with tf.Session() as sessr:
sessr.run(tf.global_variables_initializer())
newsaver.restore(sessr, tf.train.latest_checkpoint('folder where .ckpt files are'))
pred = sessr.run(output, feed_dict={xs:X_test})
At this point, if I type tf.trainable_variables(), I get the details of the four tensors W_1:0, W_O:0, b_O:0, b_1:0, plus four new ones Variable_0, Variable_1:0, Variable_2:0, Variable_3_0. This means that the data is tested on these new variables and does not give the desired result. I don't seem to be able to use the restored weights and biases W_1,W_O, b_1,b_O.
I KNOW that I am reinitialising the variables when I don't need to, and this is the problem, and I have read this post here in detail. I have also read this, and this and many others. If I remove the repitition of the model definition, 'output', or 'neural_net_model' becomes undefined and the code doesn't work. If I try to specify W_1 etc. in any other way, the code doesn't work.

Using `softmax_cross_entropy_with_logits()` with `seq2seq.sequence_loss()`

I have a working RNN using the default softmax loss function for tf.contrib.seq2seq.sequence_loss() (which I'm assuming is tf.nn.softmax()) but would instead like to use tf.nn.softmax_cross_entropy_with_logits(). According to the seq2seq.sequence_loss documentation, one may use softmax_loss_function= to override the default loss function:
softmax_loss_function: Function (labels, logits) -> loss-batch to be
used instead of the standard softmax (the default if this is None).
Note that to avoid confusion, it is required for the function to
accept named arguments.
Here is my code that works:
from tensorflow.python.layers.core import Dense
# Build the graph
train_graph = tf.Graph()
# Set the graph to default to ensure that it is ready for training
with train_graph.as_default():
# Load the model inputs
input_data, targets, keep_prob, lr, target_sequence_length, max_target_sequence_length, source_sequence_length \
= get_model_inputs()
# Create the training and inference logits
training_decoder_output, inference_decoder_output = seq2seq_model(input_data,
targets,
lr,
target_sequence_length,
max_target_sequence_length,
source_sequence_length,
len(source_letter_to_int),
len(target_letter_to_int),
encoding_embedding_size,
decoding_embedding_size,
rnn_size,
num_layers,
keep_prob)
# Create tensors for the training logits and inference logits
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
inference_logits = tf.identity(inference_decoder_output.sample_id, name='predictions')
# Create the weights for sequence_loss
masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')
with tf.name_scope("optimization"):
# Loss function
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(lr)
# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
# Add variables to collection in order to load them up when retraining a saved graph
tf.add_to_collection("cost", cost)
tf.add_to_collection("train_op", train_op)
My attempt to change the loss function is as follows (I've only indicated the code that is different):
with tf.name_scope("optimization"):
# One-hot encode targets and reshape to match logits, one row per batch_size per step
y_one_hot = tf.one_hot(targets, len(target_letter_to_int))
y_reshaped = tf.reshape(y_one_hot, [batch_size, len(target_letter_to_int), 30])
# Loss function
loss = tf.nn.softmax_cross_entropy_with_logits(logits=training_logits, labels=y_reshaped)
loss = tf.reduce_mean(loss)
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss)
The line cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss) is now giving me "TypeError: 'Tensor' object is not callable." This is one of the most opaque errors I've seen Tensorflow produce and I haven't found much of anything in the way of explanation on the internet. Any help would be appreciated.

Gradients are always zero

I have written an algorithm using tensorflow framework and faced with the problem, that tf.train.Optimizer.compute_gradients(loss) returns zero for all weights. Another problem is if I put batch size larger than about 5, tf.histogram_summary for weights throws an error that some of values are NaN.
I cannot provide here a reproducible example, because my code is quite bulky and I am not so good in TF for make it shorter. I will try to paste here some fragments.
Main loop:
images_ph = tf.placeholder(tf.float32, shape=some_shape)
labels_ph = tf.placeholder(tf.float32, shape=some_shape)
output = inference(BATCH_SIZE, images_ph)
loss = loss(labels_ph, output)
train_op = train(loss, global_step)
session = tf.Session()
session.run(tf.initialize_all_variables())
for i in xrange(MAX_STEPS):
images, labels = train_dataset.get_batch(BATCH_SIZE, yolo.INPUT_SIZE, yolo.OUTPUT_SIZE)
session.run([loss, train_op], feed_dict={images_ph : images, labels_ph : labels})
Train_op (here is the problem occures):
def train(total_loss)
opt = tf.train.AdamOptimizer()
grads = opt.compute_gradients(total_loss)
# Here gradients are zeros
for grad, var in grads:
if grad is not None:
tf.histogram_summary("gradients/" + var.op.name, grad)
return opt.apply_gradients(grads, global_step=global_step)
Loss (the loss is calculated correctly, since it changes from sample to sample):
def loss(labels, output)
return tf.reduce_mean(tf.squared_difference(labels, output))
Inference: a set of convolution layers with ReLU followed by 3 fully connected layers with sigmoid activation in the last layer. All weights initialized by truncated normal rv's. All labels are vectors of fixed length with real numbers in range [0,1].
Thanks in advance for any help! If you have some hypothesis for my problem, please share I will try them. Also I can share the whole code if you like.

Minimal RNN example in tensorflow

Trying to implement a minimal toy RNN example in tensorflow.
The goal is to learn a mapping from the input data to the target data, similar to this wonderful concise example in theanets.
Update: We're getting there. The only part remaining is to make it converge (and less convoluted). Could someone help to turn the following into running code or provide a simple example?
import tensorflow as tf
from tensorflow.python.ops import rnn_cell
init_scale = 0.1
num_steps = 7
num_units = 7
input_data = [1, 2, 3, 4, 5, 6, 7]
target = [2, 3, 4, 5, 6, 7, 7]
#target = [1,1,1,1,1,1,1] #converges, but not what we want
batch_size = 1
with tf.Graph().as_default(), tf.Session() as session:
# Placeholder for the inputs and target of the net
# inputs = tf.placeholder(tf.int32, [batch_size, num_steps])
input1 = tf.placeholder(tf.float32, [batch_size, 1])
inputs = [input1 for _ in range(num_steps)]
outputs = tf.placeholder(tf.float32, [batch_size, num_steps])
gru = rnn_cell.GRUCell(num_units)
initial_state = state = tf.zeros([batch_size, num_units])
loss = tf.constant(0.0)
# setup model: unroll
for time_step in range(num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
step_ = inputs[time_step]
output, state = gru(step_, state)
loss += tf.reduce_sum(abs(output - target)) # all norms work equally well? NO!
final_state = state
optimizer = tf.train.AdamOptimizer(0.1) # CONVERGEs sooo much better
train = optimizer.minimize(loss) # let the optimizer train
numpy_state = initial_state.eval()
session.run(tf.initialize_all_variables())
for epoch in range(10): # now
for i in range(7): # feed fake 2D matrix of 1 byte at a time ;)
feed_dict = {initial_state: numpy_state, input1: [[input_data[i]]]} # no
numpy_state, current_loss,_ = session.run([final_state, loss,train], feed_dict=feed_dict)
print(current_loss) # hopefully going down, always stuck at 189, why!?
I think there are a few problems with your code, but the idea is right.
The main issue is that you're using a single tensor for inputs and outputs, as in:
inputs = tf.placeholder(tf.int32, [batch_size, num_steps]).
In TensorFlow the RNN functions take a list of tensors (because num_steps can vary in some models). So you should construct inputs like this:
inputs = [tf.placeholder(tf.int32, [batch_size, 1]) for _ in xrange(num_steps)]
Then you need to take care of the fact that your inputs are int32s, but a RNN cell works on float vectors - that's what embedding_lookup is for.
And finally you'll need to adapt your feed to put in the input list.
I think the ptb tutorial is a reasonable place to look, but if you want an even more minimal example of an out-of-the-box RNN you can take a look at some of the rnn unit tests, e.g., here.
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/kernel_tests/rnn_test.py#L164