How add dropout into my tensorflow neural network with RNNCells? - tensorflow

I have some neural network (tensorflow)
n_steps = 10
n_inputs = 3
n_outputs = 1
n_neurons = 100
n_layers = 3
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
layers = []
for i in range(n_layers):
layers.append(tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu))
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
Like this (below) is correct? It is working but i'm not sure ;)
training = tf.placeholder_with_default(True,shape=())
X_dropout = tf.layers.dropout(X,dropout_rate,training=training)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X_dropout, dtype=tf.float32)
How to add into this neural network tensorflow dropout?
Thanks for any sugestions!

Your code just does dropout for input X, and you should use tf.contrib.rnn.DropoutWrapper(link).
layers = []
for i in range(n_layers):
layers.append(tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicRNNCell(num_units=n_neurons
, activation=tf.nn.relu)
,output_keep_prob=1-dropout_rate))

Related

Combine tf.keras.layers with Tensorflow low level API

Can i combine tf.keras.layers with low level tensorflow?
the code is not correct but i want to do something like that:create placeholders that later will be fed with data (in tf.Session()) and to feed that data to my model
X, Y = create_placeholders(n_x, n_y)
output = create_model('channels_last')(X)
cost = compute_cost(output, Y)
Yes, it is the same as using tf.layers.dense(). Using tf.keras.layers.Dense() is actually a preferred way in newest tensorflow version 1.13 (tf.layers.dense() is deprectated). For example
import tensorflow as tf
import numpy as np
x_train = np.array([[-1.551, -1.469], [1.022, 1.664]], dtype=np.float32)
y_train = np.array([1, 0], dtype=int)
x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.int32, shape=[None])
with tf.name_scope('network'):
layer1 = tf.keras.layers.Dense(2, input_shape=(2, ))
layer2 = tf.keras.layers.Dense(2, input_shape=(2, ))
fc1 = layer1(x)
logits = layer2(fc1)
with tf.name_scope('loss'):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_fn = tf.reduce_mean(xentropy)
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_op = optimizer.minimize(loss_fn)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
loss_val = sess.run(loss_fn, feed_dict={x:x_train, y:y_train})
_ = sess.run(train_op, feed_dict={x:x_train, y:y_train})

Implementing convolutional layers using Tensorflow

I am trying to implement convolutional layers for text classification from this blog post with some modification to suit my needs.
In the blog, there is only one convolution layer while I'd like mine to have two convolutional layers followed by ReLU and max-pooling.
The code so far is:
vocab_size = 2000
embedding_size = 100
filter_height = 5
filter_width = embedding_size
no_of_channels = 1
no_of_filters = 256
sequence_length = 50
filter_size = 3
no_of_classes = 26
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, no_of_classes], name="input_y")
# Defining the embedding layer:
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W")
embedded_chars = tf.nn.embedding_lookup(W, input_x)
embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
# Convolution block:
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")
Here, W is the filter matrix.
However, this gives the error:
ValueError: Dimensions must be equal, but are 256 and 1 for 'convolution-block_16/conv2' (op: 'Conv2D') with input shapes: [?,46,1,256], [5,100,1,256].
I realise I have erred in the dimensions of the layer, but I am unable to fix it or put in the correct dimensions.
If anybody could provide any guidance/help, it'd be really helpful.
Thank you.
Can't quite understand what you code to do, but change as follows will fix your problem.
with tf.name_scope("convolution-block"):
filter_shape = [filter_height, embedding_size, no_of_channels, no_of_channels #change the output channel as input#]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[no_of_filters]), name="b")
conv1 = tf.nn.conv2d(embedded_chars_expanded,
W,
strides = [1,1,1,1],
padding = "SAME", ##Change the padding scheme
name = "conv1")
conv2 = tf.nn.conv2d(conv1,
W,
strides = [1,1,1,1],
padding = "VALID",
name = "conv2")

Outputting sequence in TensorFlow RNN

I created a simple TensorFlow program that tries to predict the next character using the previous 3 characters in a body of text.
A single input could look like this:
np.array(['t','h','i'])
with the target about being
np.array(['s'])
I'm trying to expand this to output the next say 4 character rather than just the next character. To do this I tried feeding in a longer array to y
np.array(['s','','i'])
In addition to changing the y to
y = tf.placeholder(dtype=tf.int32, shape=[None, n_steps])
however, this yields the error:
Rank mismatch: Rank of labels (received 2) should equal rank of logits
minus 1 (received 2).
Here's the full code
embedding_size=40
n_neurons = 200
n_output = vocab_size
learning_rate = 0.001
with tf.Graph().as_default():
x = tf.placeholder(dtype=tf.int32, shape=[None, n_steps])
y = tf.placeholder(dtype=tf.int32, shape=[None])
seq_length = tf.placeholder(tf.int32, [None])
# Let's set up the embedding converting words to vectors
embeddings = tf.Variable(tf.random_uniform(shape=[vocab_size, embedding_size], minval=-1, maxval=1))
train_input = tf.nn.embedding_lookup(embeddings, x)
basic_cell = tf.nn.rnn_cell.GRUCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, train_input, sequence_length=seq_length, dtype=tf.float32)
logits = tf.layers.dense(states, units=vocab_size, activation=None)
predictions = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for r in range(1000):
x_batch, y_batch, seq_length_batch = input_fn()
feed_dict = {x: x_batch, y: y_batch, seq_length: seq_length_batch}
_, loss_out = sess.run([training_op, loss], feed_dict=feed_dict)
if r % 1000 == 0:
print("loss_out", loss_out)
sample_text = "for th"
sample_text_ids = np.expand_dims(np.array([w_to_id[c] for c in sample_text]+[0, 0], dtype=np.int32), 0)
prediction_out = sess.run(predictions, feed_dict={x: sample_text_ids, seq_length: np.array([len(sample_text)])})
print("Result:", id_to_w[np.argmax(prediction_out)])
In case of many-to-many RNN, you should use tf.contrib.seq2seq.sequence_loss to calculate per time step loss. Your code should look like this:
...
logits = tf.layers.dense(states, units=vocab_size, activation=None)
weights = tf.sequence_mask(seq_length, n_steps)
xentropy = tf.contrib.seq2seq.sequence_loss(logits, y, weights)
...
See here for more details on tf.contrib.seq2seq.sequence_loss.

Getting high training accuracy but wrong predictions for training set

I'm trying to get predictions for the training set but I always get the same result for every image. Could someone help me please?
This is how I defined the network:
x = tf.placeholder(tf.float32, shape=[None, image_size_h, image_size_v, num_channels])
y_ = tf.placeholder(tf.float32, shape=[None, num_classes])
keep_prob = tf.placeholder(tf.float32)
def conv_net(x1, weights, biases):
# First Convolution Layer
conv1 = conv2d_pool(x1, weights['wc1'], biases['bc1'], 'conv1')
#print(conv1.get_shape())
# Second Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'], 'conv2')
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'], 'conv3')
conv4 = conv2d(conv3, weights['wc4'], biases['bc4'],'conv4')
conv5 = conv2d(conv4, weights['wc5'], biases['bc5'],'conv5')
conv6 = conv2d(conv5, weights['wc6'], biases['bc6'],'conv6')
# Fully connected layer
fc1 = fullycon(conv6, weights['wd1'], biases['bd1'], 'fc1')
# Dropout
fc1_drop = tf.nn.dropout(fc1, keep_prob)
fc2 = fullytanh(fc1_drop, weights['wd2'], biases['bd2'], 'fc2')
out = tf.add(tf.matmul(fc2, weights['out']), biases['bout'])
return out
pred = conv_net(x, weights, biases)
prediyo = tf.argmax(tf.nn.softmax(pred), axis=1)
# Loss and optimizer
correct_prediction = tf.equal(tf.argmax(y_,axis=1), tf.argmax(pred,axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)
While I get the prediction by doing:
result_boxes_n = normalise_images(result_boxes)
for n in range (0,result_boxes.shape[0]):
predicted = prediyo.eval(feed_dict={x:np.reshape(result_boxes_n[n,:,:,:],(1,9,8,3)), keep_prob: 1})
print(predicted)
predicted outputs always the same class, although training accuracy is higher than 90%. However, if I do:
prediyo.eval(feed_dict={x:result_boxes_n, keep_prob: 1})
it works, but no when I try to do individual predictions.
Thanks in advance

Tensorflow: variable batch_size gives error when trying to predict with eval (Dimensions of inputs should match)

I am training a model with variable batch_size (first batches are 200). So I have used batch_size None to make it variable (I couldn't do that for init_state because it gave an error).
x = tf.placeholder(tf.int32, [None, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [None, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
rnn_inputs = tf.one_hot(x, num_classes)
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = tf.reshape(
tf.matmul(tf.reshape(rnn_outputs, [-1, state_size]), W) + b,
[batch_size, num_steps, num_classes])
predictions = tf.nn.softmax(logits)
Training the model goes well.
Then I try to predict probabilities with an x of shape (1, 10) instead of (200, 10):
I tried:
test = np.array([[1, 2 , 3 , 4, 5, 6 ,7, 8, 9 ,10]], dtype=np.int32)
print (predictions.eval(feed_dict = {x:test}))
I also tried in a sligly different way with:
preds, state = sess.run([g['preds'],g['final_state']], feed_dict)
Same error:
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [1,1058] vs. shape[1] = [200,4]
[[Node: rnn/while/basic_rnn_cell/basic_rnn_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](rnn/while/TensorArrayReadV3, rnn/while/Identity_2, rnn/while/basic_rnn_cell/basic_rnn_cell/concat/axis)]]
So 1058 is my num_classes, 200 is the (initial) batch_size and 4 is the width of the tensor.
I think I am not using the variable batch_size correctly. Any ideas on what to change?