On tensorflow 1.4. I'm getting the you need a value for placeholder tensor... error. Thing is I am feeding this tensor like
feats = np.reshape(feats, (-1, var1, feat_dim, 1))
_, outlogits = sess.run([train_step, logits], feed_dict={inp_layer: feats,
targs: targets,
eta: 1e-4})
(normally I'd want to reshape inside of the graph but for debugging purposes I've taken that out)
placeholder:
inp_layer = tf.placeholder(tf.float32, shape=[None, var1, feat_dim, 1])
The error says: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,66,200,1]
And this error occurs when I run sess.run(tf.global_variables_initializer()), so it hasn't even gotten to the part where it should start thinking about placeholders and yet it's complaining about them?!
I think it might have to do with the fact that one of my layer sizes is dependent on a placeholder.. (I have validate_shape=False for the weights though). Will add more code.
edit: example code that fails, point out where I think the problem is coming from (remember code fails on global variable init):
!edit2: YUP the problem is that line. The question then becomes how can I have a graph where the dimensions of one the weights (and therefore the outputs) is dynamic.
train_feats = '..'
train_json = '..'
feat_dim = 200
var1 = 20
batch_size = 64
inp_layer = tf.placeholder(tf.float32, shape=[None, var1, feat_dim, 1])
targs = tf.placeholder(tf.int64, shape=[None])
eta = tf.placeholder(tf.float32)
chunk_size = 3
w1 = init_weight([chunk_size, feat_dim, 1, 32])
b1 = tf.zeros([32])
a1 = conv_layer(inp_layer, w1, b1, stride=3, padding='VALID')
chunk_size = tf.shape(a1)[1] <==== # ! IS THE PROBLEM !
w5 = init_weight([chunk_size, 1, 32, 12])
b5 = tf.zeros([12])
a5 = conv_layer(a1, w5, b5, stride=1, padding='VALID', act=False)
logits_ = tf.reshape(a5, [-1, 12])
softmax = tf.nn.softmax(logits_)
cross_ent = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targs,
logits=logits_))
train_step = tf.train.AdamOptimizer(eta).minimize(cross_ent)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for feats, targets in batch_gen(train_feats, train_json, var1, feat_dim):
feats = np.reshape(feats, (var1, var1, feat_dim, 1))
sess.run(train_step, feed_dict={inp_layer: bla,
targs: targets,
eta: 1e-4})
def init_weight(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.01), validate_shape=False)
def conv_layer(x, w, b, stride, padding, act=True):
# striding over the features
if act:
return tf.nn.elu(tf.nn.conv2d(x, w, [1, stride, 1, 1], padding) + b)
else:
return tf.nn.conv2d(x, w, [1, stride, 1, 1], padding) + b
The line
chunk_size = tf.shape(a1)[1]
tf.shape extracts the runtime shape of a1, not the static shape known at graph definition time. Since a1 is the result of the convolution between inp_layer and w1, when you reference a1 you need also to resolve inp_layer. Because inp_layer is a placeholder, your error follows.
Since you're interested in the second dimension of a1 that is know at graph definition time, you can just use:
chunk_size = a1.shape[1].value
to extract the correct dimension value.
Related
I have a following problem with MultiRNNcell, but first things first.
My data consist looks like following:
[[a1, b2,..., x200], [b1, b2, ..., b200], ...]
The relevant code is here:
rows, row_size = 20, 10
num_classes = 3
batch_size = 128
hidden_layer_size = 256
n_layers = 4
tf_x = tf.placeholder(tf.float32, [None, rows, row_size])
tf_y = tf.placeholder(tf.float32, [None, num_classes])
in_x = tf.unstack(input_x, axis=1)
network = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, state_is_tuple=True)
for _ in range(n_layers)], state_is_tuple=True)
outputs, states = rnn.dynamic_rnn(cell=network, inputs=in_x, dtype=tf.float32)
outputs = tf.matmul(outputs[-1], layer["weights"]) + layer["biases"]
...
...
x_feed = np.array(x_feed.reshape((batch_size, rows, row_size)))
_, c = sess.run([optimizer, loss_fn], feed_dict={tf_x: x_feed, tf_y: y_feed})
I am getting an error ValueError: Shape (10, ?) must have rank at least 3
and traceback is showing on line
outputs, states = rnn.dynamic_rnn(cell=network, inputs=in_x, dtype=tf.float32)
outputs, states = rnn.static_rnn(cell=network, inputs=x3, dtype=tf.float32)
In case, when I use static_rnn instead of dynamic_rnn everything is running fine, but I don't know what I am doing wrong. How to use dynamic_rnn in this case?
I was trying to implement various GANs in Tensorflow (after doing it successfully in PyTorch), and I am having some problems while coding the discriminator part.
The code of the discriminator (very similar to the MNIST CNN tutorial) is:
def discriminator(x):
"""Compute discriminator score for a batch of input images.
Inputs:
- x: TensorFlow Tensor of flattened input images, shape [batch_size, 784]
Returns:
TensorFlow Tensor with shape [batch_size, 1], containing the score
for an image being real for each input image.
"""
with tf.variable_scope("discriminator"):
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
h_1 = leaky_relu(tf.layers.conv2d(x, 32, 5))
m_1 = tf.layers.max_pooling2d(h_1, 2, 2)
h_2 = leaky_relu(tf.layers.conv2d(m_1, 64, 5))
m_2 = tf.layers.max_pooling2d(h_2, 2, 2)
m_2 = tf.contrib.layers.flatten(m_2)
h_3 = leaky_relu(tf.layers.dense(m_2, 4*4*64))
logits = tf.layers.dense(h_3, 1)
return logits
while the code for the generator (architecture of InfoGAN paper) is:
def generator(z):
"""Generate images from a random noise vector.
Inputs:
- z: TensorFlow Tensor of random noise with shape [batch_size, noise_dim]
Returns:
TensorFlow Tensor of generated images, with shape [batch_size, 784].
"""
with tf.variable_scope("generator"):
batch_size = tf.shape(z)[0]
fc = tf.nn.relu(tf.layers.dense(z, 1024))
bn_1 = tf.layers.batch_normalization(fc)
fc_2 = tf.nn.relu(tf.layers.dense(bn_1, 7*7*128))
bn_2 = tf.layers.batch_normalization(fc_2)
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
c_1 = tf.nn.relu(tf.contrib.layers.convolution2d_transpose(bn_2, 64, 4, 2, padding='valid'))
bn_3 = tf.layers.batch_normalization(c_1)
c_2 = tf.tanh(tf.contrib.layers.convolution2d_transpose(bn_3, 1, 4, 2, padding='valid'))
So far, so good. The number of parameters is correct (checked it). However, I am having some problems in the next block of code:
tf.reset_default_graph()
# number of images for each batch
batch_size = 128
# our noise dimension
noise_dim = 96
# placeholder for images from the training dataset
x = tf.placeholder(tf.float32, [None, 784])
# random noise fed into our generator
z = sample_noise(batch_size, noise_dim)
# generated images
G_sample = generator(z)
with tf.variable_scope("") as scope:
#scale images to be -1 to 1
logits_real = discriminator(preprocess_img(x))
# Re-use discriminator weights on new inputs
scope.reuse_variables()
logits_fake = discriminator(G_sample)
# Get the list of variables for the discriminator and generator
D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')
# get our solver
D_solver, G_solver = get_solvers()
# get our loss
D_loss, G_loss = gan_loss(logits_real, logits_fake)
# setup training steps
D_train_step = D_solver.minimize(D_loss, var_list=D_vars)
G_train_step = G_solver.minimize(G_loss, var_list=G_vars)
D_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'discriminator')
G_extra_step = tf.get_collection(tf.GraphKeys.UPDATE_OPS, 'generator')
The problem I am getting is where I am doing the reshape in the discriminator, and the error says:
ValueError: None values not supported.
Sure, the value for the batch_size is None (btw, the same error I am getting even where I am changing it to some number), but shape function (as far as I understand) should get the dynamic shape, not the static one. I think that I am a bit lost here.
For what is worth, I am giving here the link to the entire notebook I am working: https://github.com/TheRevanchist/GANs/blob/master/GANs-TensorFlow.ipynb if someone wants to look at it.
NB: The code here is part of the Stanford CS231n assignment. I have no affiliation with Stanford though, so it isn't homework cheating (proof: the course is finished months ago).
The generator seems to be the problem. The output size should match the discriminator. And the other issues are batch norm should be applied before the activation unit. I have modified the code:
with tf.variable_scope("generator"):
fc = tf.layers.dense(z, 4*4*128)
bn_1 = leaky_relu(tf.layers.batch_normalization(fc))
bn_1 = tf.reshape(bn_1, [-1, 4, 4, 128])
c_1 = tf.layers.conv2d_transpose(bn_1, 64, 5, strides=2, padding='same')
bn_2 = leaky_relu(tf.layers.batch_normalization(c_1))
c_2 = tf.layers.conv2d_transpose(bn_2, 32, 5, strides=2, padding='same')
bn_3 = leaky_relu(tf.layers.batch_normalization(c_2))
c_3 = tf.layers.conv2d_transpose(bn_3, 1, 5, strides=2, padding='same')
c_3 = tf.layers.batch_normalization(c_3)
c_3 = tf.image.resize_images(c_3, (28, 28))
c_3 = tf.contrib.layers.flatten(c_3)
c_3 = tf.tanh(c_3)
return c_3
Your code gives the below output when run with the above changes
Instead of passing None to reshape you must pass -1.
So this:
x = tf.reshape(x, [tf.shape(x)[0], 28, 28, 1])
becomes
x = tf.reshape(x, [-1, 28, 28, 1])
and this:
bn_2 = tf.reshape(bn_2, [batch_size, 7, 7, 128])
becomes:
bn_2 = tf.reshape(bn_2, [-1, 7, 7, 128])
It will infer the batch size from the rest of the shape you provided.
I'm having a rough time trying to figure out what's wrong with my LSTM model. I have 11 inputs, and 2 output classes (one-hot encoded) and very quickly, like within 1 batch or so, the error just goes to the % of one of the output classes and stays there.
I tried printing weights and biases, but they seem to all be full of NaN.
If i decrease the learning rate, or mess around with layers/units, I can get it to arrive at the % of one class error slowly, but it seems to always get to that point.
Here's the code:
num_units = 30
num_layers = 50
dropout_rate = 0.80
learning_rate=0.0001
batch_size = 180
epoch = 1
input_classes = len(train_input[0])
output_classes = len(train_output[0])
data = tf.placeholder(tf.float32, [None, input_classes, 1]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, output_classes]) #one-hot encoded: [1,0] = bad, [0,1] = good
dropout = tf.placeholder(tf.float32)
cell = tf.contrib.rnn.LSTMCell(num_units, state_is_tuple=True)
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
#Input shape [batch_size, max_time, depth], output shape: [batch_size, max_time, cell.output_size]
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2]) #reshapes it to [sequence_size, batch_size, depth]
#get last entry as it includes previous results
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.get_variable("W", shape=[num_units, output_classes], initializer=tf.contrib.layers.xavier_initializer())
bias = tf.get_variable("B", shape=[output_classes], initializer=tf.contrib.layers.xavier_initializer())
logits = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target)
prediction = tf.clip_by_value(prediction, 1e-10,100.0)
cost = tf.reduce_mean(prediction)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
minimize = optimizer.minimize(cost)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(logits, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()
sess = tf.Session()
sess.run(init_op)
no_of_batches = int((len(train_input)) / batch_size)
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out, dropout: dropout_rate })
sess.close()
Since you have one hot encoding use sparse_softmax_cross_entropy_with_logits instead of tf.nn.softmax_cross_entropy_with_logits.
Refer to this stackoverflow answer to understand the difference of two functions.
1
I am trying to use the softmax regression method discussed in https://www.tensorflow.org/get_started/mnist/beginners to recognize characters.
My code is as follows.
train_data = pd.read_csv('CharDataSet/train.csv')
print(train_data.shape)
x = tf.placeholder(tf.float32, [None, 130])
W = tf.Variable(tf.zeros([130, 26]))
b = tf.Variable(tf.zeros([26]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 26])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(10):
batch_xs = train_data.iloc[:, 2:]
print(batch_xs)
batch_ys = getencodedbatch(train_data.iloc[:, 1])
print(batch_ys)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
However, I am getting an accuracy of 1, which shouldn't be the case.
The reason why I am getting it so is because my y tensor results with an array like
[nan, ..., nan]
Can anyone explain to me what is wrong in my code?
I converted each character to a one-hot encoding using the method below
def getencodedbatch(param):
s = (param.shape[0],26)
y_encode = np.zeros(s)
row=0
# print(y_encode)
for val in param:
col = ord(val)-97
y_encode[row, col] = 1
row += 1
return pd.DataFrame(y_encode)
Here is the problem you are having:
You set your initial weights and biases to 0 (this is wrong, as your
network does not learn).
The result is that y consists of all zeros
You take the log of y.. and a log of 0 is not defined... Hence the NaN.
Good luck!
Edit to tell you how to fix it: look for an example on classifying MNIST characters and see what they do. You probably want to initialise your weights to be random normals ;)
I'm sure I'm missing something obvious. Here's the tail end of my code:
# simple loss function
loss = tf.reduce_sum(tf.abs(tf.sub(x4, yn)))
train_step = tf.train.GradientDescentOptimizer(0.000001).minimize(loss)
with tf.Session() as sess:
tf.initialize_all_variables().run()
print(sess.run([tf.reduce_sum(w1), tf.reduce_sum(b1)]))
for i in range(5):
# fill in x1 and yn
sess.run(train_step, feed_dict={x1: in_images, yn: out_images})
print(sess.run([tf.reduce_sum(w1), tf.reduce_sum(b1)]))
The network descending from the loss function is a simple CNN, with conv2d's and bias_adds, and elu's. I wanted to take a look at how the weights and biases for the first layer change. The first print returns the expected values ([ +/- 100 or so, 0]), as w1 was initialized with a random normal and b1 initialized with zeros.
The second print statement gives a different value pair, as expected.
What's not expected is that each time through the loop, the second print statement prints the same value pair, as though each invocation of train_step is doing the same thing each time, rather than updating the values of the Variables in the loss network.
What am I missing here?
Here's a cut and paste of the interesting part of the run:
I tensorflow/core/common_runtime/gpu/gpu_device.cc:806] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 970, pci bus id: 0000:01:00.0)
[-50.281082, 0.0]
W tensorflow/core/common_runtime/bfc_allocator.cc:213] Ran out of memory trying to allocate 3.98GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory is available.
[112.52832, 0.078026593]
[112.52832, 0.078026593]
[112.52832, 0.078026593]
[112.52832, 0.078026593]
[112.52832, 0.078026593]
I can post the network itself if necessary, but I suspect the problem is my mental model of how tensorflow updates state.
Here's the entire python program, with a dummy routine for the image input to show the issue:
import tensorflow as tf
import numpy as np
from scipy import misc
H = 128
W = 128
x1 = tf.placeholder(tf.float32, [None, H, W, 1], "input_image")
yn = tf.placeholder(tf.float32, [None, H-12, W-12, 1], "test_image")
w1 = tf.Variable(tf.random_normal([7, 7, 1, 64])) # 7x7, 1 input chan, 64 output chans
b1 = tf.Variable(tf.constant(0.1, shape=[64]))
x2 = tf.nn.conv2d(x1, w1, [1,1,1,1], "VALID")
x2 = tf.nn.bias_add(x2, b1)
x2 = tf.nn.elu(x2)
w2 = tf.Variable(tf.random_normal([5, 5, 64, 32])) # 5x5, 64 input 32 output chans
b2 = tf.Variable(tf.constant(0.1, shape=[32]))
x3 = tf.nn.conv2d(x2, w2, [1,1,1,1], "VALID")
x3 = tf.nn.bias_add(x3, b2)
x3 = tf.nn.elu(x3)
w3 = tf.Variable(tf.random_normal([3, 3, 32, 1]))
b3 = tf.Variable(tf.constant(0.1, shape=[1]))
x4 = tf.nn.conv2d(x3, w3, [1,1,1,1], "VALID")
x4 = tf.nn.bias_add(x4, b3)
x4 = tf.nn.elu(x4)
loss = tf.reduce_sum(tf.abs(tf.sub(x4, yn)))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
# fake for testing
in_images = np.random.rand(20, 128, 128, 1)
out_images = np.random.rand(20, 116, 116, 1)
with tf.Session() as sess:
tf.initialize_all_variables().run()
print(sess.run([tf.reduce_mean(w1), tf.reduce_mean(b1)]))
for i in range(5):
# fill in x1 and yn
sess.run(train_step, feed_dict={x1: in_images, yn: out_images})
print(sess.run([tf.reduce_mean(w1), tf.reduce_mean(b1)]))
I've looked at a bunch of other training examples and I'm still not seeing what I am doing wrong. Changing the learning rate will just change the numbers printed but the behavior remains the same, no apparent change from running the optimizer.
The error was in the way I computed my loss function. I just added up all of the errors across the batch, rather than taking the mean error for each pair of images. The following loss function
# simple loss function
diff_image = tf.abs(tf.sub(x4,yn))
# sum over all dimensions except batch dim
err_sum = tf.reduce_sum(diff_image, [1,2,3])
#take mean over batch
loss = tf.reduce_mean(err_sum)
actually starts converging with the AdamOptimizer. The GradientDescentOptimizer still exhibits the "change once only" feature, and I'll go treat it as a bug and post on github.