Strange error when taking gradient of TensorArray - tensorflow

I'm using the code pasted below. The 'forward' part of the code seems to work by virtue of the "assert root_emb == 1 + emb[0] * emb[1]" passing. However, once a training step is taken (the line following the assert), a strange error appears suggesting an issue with the TensorArray written to during the wihle loop.
tensorflow.python.framework.errors.InvalidArgumentError: TensorArray
TensorArray#gradients: Could not read from TensorArray index 2 because
it has not yet been written to. [[Node:
gradients/while/TensorArrayWrite_grad/TensorArrayRead =
TensorArrayRead[_class=["loc:#TensorArray"], dtype=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/cpu:0"](gradients/while/TensorArrayWrite_grad/TensorArrayGrad/TensorArrayGrad,
gradients/while/TensorArrayWrite_grad/TensorArrayRead/StackPop,
gradients/while/TensorArrayWrite_grad/TensorArrayGrad/gradient_flow)]]
Caused by op u'gradients/while/TensorArrayWrite_grad/TensorArrayRead',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 61, in init
self.grad = tf.gradients(self.loss, self.params) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py",
line 481, in gradients
in_grads = _AsList(grad_fn(op, *out_grads)) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py",
line 115, in _TensorArrayWriteGrad
grad = g.read(index) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 177, in read
dtype=self._dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 781, in _tensor_array_read
flow_in=flow_in, dtype=dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 2154, in create_op
original_op=self._default_original_op, op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 1154, in init
self._traceback = _extract_stack()
...which was originally created as op u'while/TensorArrayWrite',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 50, in init
loop_vars=(self.time, node_emb, tf.zeros([1]))) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1681, in While
back_prop=back_prop, swap_memory=swap_memory, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1671, in while_loop
result = context.BuildLoop(cond, body, loop_vars) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1572, in BuildLoop
body_result = body(*vars_for_body_with_tensor_arrays) File "minimal.py", line 43, in _recurrence
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) File
"/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 200, in write
name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 875, in _tensor_array_write
value=value, flow_in=flow_in, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def)
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import tensor_array_ops, control_flow_ops
class TreeRNN(object):
def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01):
self.num_emb = num_emb
self.emb_dim = emb_dim
self.output_dim = output_dim
self.degree= degree
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
self.recursive_unit = self.create_recursive_unit()
self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim]))
self.b_out = tf.Variable(self.init_vector([self.output_dim]))
self.x = tf.placeholder(tf.int32, shape=[None]) # word indices
self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1])
self.y = tf.placeholder(tf.float32, shape=[self.output_dim])
num_words, = tf.unpack(tf.shape(self.x), 1) # also num leaves
emb_x = tf.gather(self.embeddings, self.x)
node_emb = tensor_array_ops.TensorArray(
dtype=tf.float32, size=num_words - 1, dynamic_size=True,
clear_after_read=False)
node_emb = node_emb.unpack(emb_x)
num_nodes, _ = tf.unpack(tf.shape(self.tree), 2) # num internal nodes
tree_traversal = tensor_array_ops.TensorArray(
dtype=tf.int32, size=num_nodes)
tree_traversal = tree_traversal.unpack(self.tree)
def _recurrence(t, node_emb, _):
node_info = tree_traversal.read(t)
children_and_parent = tf.unpack(node_info, self.degree + 1)
child_emb = []
for i in xrange(self.degree):
child_emb.append(node_emb.read(children_and_parent[i]))
parent_emb = self.recursive_unit(child_emb)
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb)
return t + 1, new_node_emb, parent_emb
self.time = tf.constant(0, dtype=tf.int32, name='time')
_, _, final_emb = control_flow_ops.While(
cond=lambda t, _1, _2: t < num_nodes,
body=_recurrence,
loop_vars=(self.time, node_emb, tf.zeros([1])))
self.final_state = final_emb
self.pred_y = self.activation(
tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1]))
+ self.b_out)
self.loss = self.loss_fn(self.y, self.pred_y)
self.params = tf.trainable_variables()
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
self.grad = tf.gradients(self.loss, self.params)
self.updates = opt.apply_gradients(zip(self.grad, self.params))
def init_matrix(self, shape):
return tf.random_normal(shape, stddev=0.1)
def init_vector(self, shape):
return tf.zeros(shape)
def create_recursive_unit(self):
def unit(child_emb): # very simple
return 1 + child_emb[0] * child_emb[1]
return unit
def activation(self, inp):
return tf.sigmoid(inp)
def loss_fn(self, y, pred_y):
return tf.reduce_sum(tf.square(y - pred_y))
model = TreeRNN(8, 1, 1, degree=2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
root_emb = sess.run([model.final_state],
feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])})
emb, = sess.run([model.embeddings])
assert root_emb == 1 + emb[0] * emb[1]
out = sess.run([model.updates, model.loss],
feed_dict={model.x: np.array([0, 1]),
model.tree: np.array([[0, 1, 2]]),
model.y: np.array([0])})

set parallel_iterations=1 in tf.while_loop

Related

Tensorflow error: Failed to convert object of type <class 'dict'> to Tensor

I am trying to code a neural network which can recognize handwritten digits. I am using the MNIST dataset and the tensor flow library. For now, I am only trying to train the network but it throws a huge error whenever I run it. I am a beginner, so I am very sorry if the code looks bad.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data", one_hot = True)
numNodesH1 = 600
numNodesH2 = 500
numNodesH3 = 500
numNodesOut = 10
sizeOfBatch = 150
y = tf.placeholder("float")
x = tf.placeholder("float", [None, 784])
def neuralNetwork(value):
H1 = {'weights': tf.Variable(tf.random_normal([784, numNodesH1])),
"biases": tf.Variable(tf.random_normal([numNodesH1]))}
H2 = {'weights': tf.Variable(tf.random_normal([numNodesH1,
numNodesH2])),
"biases": tf.Variable(tf.random_normal([numNodesH2]))}
H3 = {"weights": tf.Variable(tf.random_normal([numNodesH2,
numNodesH3])),
"biases": tf.Variable(tf.random_normal([numNodesH3]))}
output = {"weights": tf.Variable(tf.random_normal([numNodesH3,
numNodesOut])),
"biases": tf.Variable(tf.random_normal([numNodesOut]))}
FinalH1 = tf.add(tf.matmul(value, H1["weights"]), H1["biases"])
FinalH1 = tf.nn.relu(FinalH1)
FinalH2 = tf.add(tf.matmul(H1, H2["weights"]), H2["biases"])
FinalH2 = tf.nn.relu(FinalH2)
FinalH3 = tf.add(tf.matmul(H2, H3["weights"]), H3["biases"])
FinalH3 = tf.nn.relu(FinalH3)
FinalOut = tf.matmul(H3, output["weights"]) + output["biases"]
return FinalOut
def train(inputdata):
prediction = neuralNetwork(inputdata)
cost=tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizingTool = tf.train.AdamOptimizer().minimize(cost)
epochsNum = 10
with tf.Session as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochsNum):
lostEpochs = 0
for o in range(int(mnist.train.num_examples / sizeOfBatch)):
ex, ey = mnist.train.next_batch(sizeOfBatch)
_, c = sess.run([optimizer, cost], feed_dict = {x: ex, y:
ey})
lostEpochs = lostEpochs + c
print("Epochs completed = ", i, " / ", epochsNum, " epoch loss =
", lostEpochs)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
neuralAccuracy = tf.reduce_mean(tf.cast(correct, "float"))
print(neuralAccuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
train(x)
Every time I run this code, it gives me this error:
Traceback (most recent call last):
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 468, in
make_tensor_proto
str_values = [compat.as_bytes(x) for x in proto_values]
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 468, in
<listcomp>
str_values = [compat.as_bytes(x) for x in proto_values]
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\util\compat.py", line 65, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got {'weights': <tf.Variable
'Variable:0' shape=(784, 600) dtype=float32_ref>, 'biases': <tf.Variable
'Variable_1:0' shape=(600,) dtype=float32_ref>}
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 45, in
<module>
train(x)
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 29, in
train
prediction = neuralNetwork(inputdata)
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 22, in
neuralNetwork
FinalH2 = tf.add(tf.matmul(H1, H2["weights"]), H2["biases"])
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\ops\math_ops.py", line 1844, in matmul
a = ops.convert_to_tensor(a, name="a")
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\ops.py", line 836, in convert_to_tensor
as_ref=False)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\ops.py", line 926, in
internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\constant_op.py", line 229, in
_constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\constant_op.py", line 208, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 472, in
make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'dict'> to Tensor.
Contents:
{'weights': <tf.Variable 'Variable:0' shape=(784, 600) dtype=float32_ref>,
'biases': <tf.Variable 'Variable_1:0' shape=(600,) dtype=float32_ref>}.
Consider
casting elements to a supported type.
I think you meant
FinalH1 = tf.add(tf.matmul(value, H1["weights"]), H1["biases"])
FinalH1 = tf.nn.relu(FinalH1)
FinalH2 = tf.add(tf.matmul(FinalH1, H2["weights"]), H2["biases"])
FinalH2 = tf.nn.relu(FinalH2)
FinalH3 = tf.add(tf.matmul(FinalH2, H3["weights"]), H3["biases"])
FinalH3 = tf.nn.relu(FinalH3)
FinalOut = tf.matmul(FinalH3, output["weights"]) + output["biases"]
Note FinalH1 instead of H1 and that same for H2 and H3.

Tensorflow: value error with variable_scope in LSTM

This is my code in tensorflow to train a GAN. I am training des to able to distinguish between fake and original video. I have important not relevant part of code to avoid stack over flow mostly code error
X = tf.placeholder(tf.float32, shape=[None, 28, 28])
D_W1 = tf.Variable(xavier_init([1024, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))
D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))
theta_D = [D_W1, D_W2, D_b1, D_b2]
rnn_size = 1024
rnn_layer = 2
Z = tf.placeholder(tf.float32, shape=[None, 100])
G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))
G_W2 = tf.Variable(xavier_init([128, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))
theta_G = [G_W1, G_W2, G_b1, G_b2]
def sample_Z(m, n):
return np.random.uniform(-1., 1., size=[m, n])
def generator(z):
G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
G_prob = tf.nn.sigmoid(G_log_prob)
G_prob = tf.reshape(G_prob, [-1,28, 28])
return G_prob
def discriminator(x):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
G_sample = generator(Z)
print(G_sample.get_shape())
print(X.get_shape())
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)
D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))
summary_d = tf.summary.histogram('D_loss histogram', D_loss)
summary_g = tf.summary.histogram('D_loss histogram', G_loss)
summary_s = tf.summary.scalar('D_loss scalar', D_loss)
summary_s1 = tf.summary.scalar('scalar scalar', G_loss)
# Add image summary
summary_op = tf.summary.image("plot", image)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)
mb_size = 128
Z_dim = 100
mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
# merged_summary_op = tf.summary.merge_all()
sess = tf.Session()
saver = tf.train.Saver()
writer1 = tf.summary.FileWriter('log/log-sample1', sess.graph)
writer2 = tf.summary.FileWriter('log/log-sample2', sess.graph)
sess.run(tf.global_variables_initializer())
if not os.path.exists('out/'):
os.makedirs('out/')
i = 0
with tf.variable_scope("myrnn") as scope:
for it in range(5000):
X_mb, _ = mnist.train.next_batch(mb_size)
X_mb = tf.reshape(X_mb, [mb_size, -1, 28])
_, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
_, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})
summary_str, eded = sess.run([summary_d, summary_s], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer1.add_summary(summary_str, it)
writer1.add_summary(eded, it)
summary_str1, eded1 = sess.run([summary_g, summary_s1], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer2.add_summary(summary_str1, it)
writer2.add_summary(eded1, it)
if it % 1000 == 0:
print('Iter: {}'.format(it))
print('D loss: {:.4}'. format(D_loss_curr))
print('G_loss: {:.4}'.format(G_loss_curr))
print()
save_path = saver.save(sess, "tmp/model.ckpt")
writer1.close()
writer2.close()
`
Following is the error when I run this code please help.
Traceback (most recent call last):
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 104, in <module>
D_fake, D_logit_fake = discriminator(G_sample)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1212, in static_rnn
(output, state) = call_cell()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 103, in <module>
D_real, D_logit_real = discriminator(X)
It is GAN. I am using MNIST data to train generator and discriminator.
Add a reuse parameter to the BasicLSTMCell. Since you are calling the discriminator function twice and calling reuse=None, both the times, it throws the errors when try to create variables with same name. In this context you need to reuse the variables from the graph for the second call; as you don't need to create new set of variables.
def discriminator(x, reuse):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True, reuse=reuse))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
....
D_real, D_logit_real = discriminator(X, None)
D_fake, D_logit_fake = discriminator(G_sample, True)
....

Trying to add CNN to an MLP Siamese

I'm getting an incompatible shape error when trying trying to add a CNN to a ready siamese code that I got from github : here is the link :
https://github.com/ywpkwon/siamese_tf_mnist
here is the code for running the session:
""" Siamese implementation using Tensorflow with MNIST example.
This siamese network embeds a 28x28 image (a point in 784D)
into a point in 2D.
By Youngwook Paul Kwon (young at berkeley.edu)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os
#import helpers
import inference
import visualize
# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()
# setup siamese network
siamese = inference.siamese();
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()
# start training
if new:
for step in range(1000):
batch_x1, batch_y1 = mnist.train.next_batch(128)
batch_x2, batch_y2 = mnist.train.next_batch(128)
batch_y = (batch_y1 == batch_y2).astype('float')
_, loss_v = sess.run([train_step, siamese.loss], feed_dict={
siamese.x1: batch_x1,
siamese.x2: batch_x2,
siamese.y_: batch_y})
if step % 10 == 0:
print ('step %d: loss' % (step))
print (loss_v)
here is the code for creating the Siamese model.
import tensorflow as tf
class siamese:
# Create model
def __init__(self):
self.x1 = tf.placeholder(tf.float32, [None, 784])
self.x2 = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("siamese") as scope:
self.o1 = self.network(self.x1)
scope.reuse_variables()
self.o2 = self.network(self.x2)
# Create loss
self.y_ = tf.placeholder(tf.float32, [None])
self.loss = self.loss_with_step()
def network(self, x):
weights = []
fc1 = self.fc_layer(x, 1024, "fc1" , [5, 5, 1, 32])
return fc1
def fc_layer(self, bottom, n_weight, name,kernel_shape ): #[5, 5, 1, 32]
assert len(bottom.get_shape()) == 2
#n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
weights_for_convolution = tf.get_variable(name+"weights_for_convolution", kernel_shape,
initializer=tf.random_normal_initializer())
bias_shape = kernel_shape[-1]
biases_for_convolution = tf.get_variable(name+"biases_for_convolution", [bias_shape],
initializer=tf.constant_initializer(0.1))
biases_for_connected_layer = tf.get_variable(name+"biases_for_connected_layer", [1024],
initializer=tf.constant_initializer(0.1))
weights_for_connected_layer = tf.get_variable(name+"weights_for_connected_layer", [7*7*64,1024],
initializer=tf.random_normal_initializer())
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[1024,2], initializer=initer)
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[2], dtype=tf.float32))
#weights_for_readout_layer = tf.get_variable("weights_for_readout_layer", [1024,2],
#initializer=tf.random_normal_initializer())
#biases_for_readout_layer = tf.get_variable("biases_for_readout_layer", [2],
#initializer=tf.constant_initializer(0.1))
bottom1 = tf.reshape(bottom,[-1,28,28,1]) ##
c2 = tf.nn.conv2d(bottom1, weights_for_convolution, strides=[1, 1, 1, 1], padding='SAME')
conv = tf.nn.bias_add(c2, biases_for_convolution)
relu = tf.nn.relu(conv)
out = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#print tf.shape(out)
h_out_flat = tf.reshape(out ,[-1,7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_out_flat, weights_for_connected_layer) + biases_for_connected_layer)
#compute model output
final_output = tf.matmul(h_fc1,W) + b
#fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return final_output
def loss_with_spring(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
print tf.shape(eucd2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
# yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
def loss_with_step(self):
margin = 5.0
labels_t = self.y_ #128
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
Actually as the batch size is 128 label-t is 128,
the problem here is that the euclidean distance in the loss_with_step function,
as well as in the loss_with_spring function is of size 256 and not 128 I don't really know why!
here is the error I get.
Traceback (most recent call last):
File "run1.py", line 56, in <module>
siamese.y_: batch_y})
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost/ replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
Caused by op u'y_x_eucd', defined at:
File "run1.py", line 28, in <module>
siamese = inference1.siamese();
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 18, in __init__
self.loss = self.loss_with_step()
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 110, in loss_with_step
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/math_ops.py", line 286, in multiply
return gen_math_ops._mul(x, y, name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/op_def_library.py", line 767, in apply
_op
op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost /replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
can anyone help?
Looks like your reshaping after the convolution is wrong. The output of the convolution layer would be 14x14x32 for a 28x28x1 input passed through conv(stride=1)-maxpool(stride 2). So you need to change the flatten layer to :
h_out_flat = tf.reshape(out ,[-1,14*14*32])
and also the weights_for_connected_layer appropriately.

TensorFlow creating an Ai, error: You must feed a value for placeholder tensor 'input_1/X'

I am currently working on an AI for openai, I am trying to pass random data collected to make a model of a neural network, then use that model to create new data. When I try to make another model using the new trained data it wont let e create a new model and gives an
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]].
my code:
import gym
import random
import numpy as np
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import matplotlib.pyplot as plt
env = gym.make("CartPole-v1")
env.reset()#restarts the enviroment
epoch = 5
LR = 2e-4
max_score = 500
number_of_training_games = 100
generations = 3
training_scores = []
random_gen_score = []
def create_random_training_data():
x = 0
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
training_scores = array_of_scores
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def training_model(sample_data):
inputs = np.array([i[0] for i in sample_data]).reshape(-1,4,1)
correct_output = [i[1] for i in sample_data]
model = neural_network(input_size = len(inputs[0]))
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
print(input)
return model
def neural_network(input_size):
# this is where our observation data will go
network = input_data(shape=[None, input_size, 1], name = 'input')
# our neural networks
network = fully_connected(network, 128, activation = 'relu')
#dropout is used to drop randon nodes inorder to reduce over training
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 512, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 128, activation = 'relu')
network = dropout(network, 0.8)
# this is the output
network = fully_connected(network, 2, activation = 'softmax')
network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def run_generation():
random_sample_data = []
trained_data = []
for i in range(generations):
if len(random_sample_data) ==0:
random_sample_data = create_random_training_data()
model1 = training_model(random_sample_data)
else:
trained_data = one_generation(model1)
model2 = training_model(trained_data)
return model2
def one_generation(model):
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
if len(prev_observation) == 0:
action = env.action_space.sample()
else:
action = np.argmax(model.predict(prev_observation.reshape(-1,len(prev_observation),1))[0])
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def testing():
scores = []
model = run_generation()
for _ in range(100):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(max_score):
env.render()
#first move is going to be random
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
#records actions
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([new_observation, action])
score+=reward
if done: break
scores.append(score)
#print('Average training Score:',sum(training_scores)/len(training_scores))
print('Average Score:',sum(scores)/len(scores))
print (scores)
testing()
error:
Traceback (most recent call last):
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/anaconda/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 92, in training_model
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
File "/anaconda/lib/python3.6/site-packages/tflearn/models/dnn.py", line 215, in fit
callbacks=callbacks)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 336, in fit
show_metric)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 777, in _train
feed_batch)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'input_1/X', defined at:
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 90, in training_model
model = neural_network(input_size = len(inputs[0]))
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 100, in neural_network
network = input_data(shape=[None, input_size, 1], name = 'input')
File "/anaconda/lib/python3.6/site-packages/tflearn/layers/core.py", line 81, in input_data
placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1507, in placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1997, in _placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
this line:
network = input_data(shape=[None, input_size, 1], name = 'input')
should be
network = input_data(shape=[None, input_size,input_size , 1], name = 'input')
there should be 4 arguments first is taken as place holder.
Try this.

MomentumOptimizer error: Attempting to use uninitialized value Variable_2/Momentum

I'm learning TensorFlow. I was trying tf.train.MomentumOptimizer but I got the following error:
Traceback (most recent call last):
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 70, in run
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
Caused by op u'Momentum/update_Variable_2/ApplyMomentum', defined at:
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 55, in run
train_step = self.optimizer.minimize(self.cross_entropy)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 289, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 413, in apply_gradients
update_ops.append(processor.update_op(self, grad))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 61, in update_op
return optimizer._apply_dense(g, self._v) # pylint: disable=protected-access
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/momentum.py", line 69, in _apply_dense
use_nesterov=self._use_nesterov).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 348, in apply_momentum
use_nesterov=use_nesterov, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
And following is my code:
import time
import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
class ReluMnistNet:
def __init__(self, optimizer=None):
self.varlist = []
self.optimizer = optimizer or tf.train.GradientDescentOptimizer(0.01)
# fetch dataset
self.mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# prepare environment
layers = [ 100 ]
input_layer = 784
output_layer = 10
self.x = tf.placeholder(tf.float32, [None, input_layer])
last_layer = input_layer
y = self.x
for layer in layers:
b = tf.Variable(tf.zeros([layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,layer], stddev=0.01))
self.varlist.append(W)
y = tf.nn.relu( tf.matmul(y,W) ) + b
last_layer = layer
b = tf.Variable(tf.zeros([output_layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,output_layer], stddev=0.01))
self.varlist.append(W)
self.y = tf.matmul(y,W) + b
self.y_ = tf.placeholder(tf.float32, [None, 10])
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
def prepare(self):
# init = tf.initialize_variables(self.varlist)
init = tf.initialize_all_variables()
self.sess = tf.Session()
self.sess.run(init)
def run(self, batch_size=100, stop=0.001, print_epoch=False):
mnist = self.mnist
data_size = mnist.train.images.shape[0]
last_accuracy = 0
accuracy_history = []
train_step = self.optimizer.minimize(self.cross_entropy)
time1 = time.time()
for i in range(10000):
for j in range(data_size/batch_size):
# random batch
batch_idx = np.arange(data_size)
np.random.shuffle(batch_idx)
batch_idx = batch_idx[0:batch_size]
batch_xs = mnist.train.images[batch_idx]
batch_ys = mnist.train.labels[batch_idx]
# ordered batch
# start = j * batch_size
# end = (j+1) * batch_size
# batch_xs, batch_ys = mnist.train.images[start:end], mnist.train.labels[start:end]
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
# test the accuracy
correct_prediction = tf.equal( tf.argmax(self.y,1), tf.argmax(self.y_,1) )
accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )
accuracy = self.sess.run(accuracy, feed_dict = {self.x: mnist.test.images, self.y_: mnist.test.labels})
accuracy_history.append(accuracy)
if print_epoch:
print i, accuracy
if last_accuracy != 0 and abs(last_accuracy-accuracy) < stop:
break
last_accuracy = accuracy
time2 = time.time()
return accuracy_history, (time2-time1)
def close(self):
if not (self.sess is None):
self.sess.close()
self.sess = None
if __name__ == '__main__':
learner = ReluMnistNet()
# learner.optimizer = tf.train.GradientDescentOptimizer(0.01)
learner.optimizer = tf.train.MomentumOptimizer(0.01, momentum=0.9)
for i in range(10):
learner.prepare()
learner.run(stop=0.01, print_epoch=True)
learner.close()
It seems like a variable named Momentum is uninitialized? However, by calling learner.prepare(), I have called tf.initialize_all_variables(). Even more, I have no variable named Momentum. Why does this happens?
In your code you are calling minimize after initializing global variables
instead you have to do:
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
self.optimize = self.optimizer.minimize(self.cross_entropy)
and in run function instead of
train_step = self.optimizer.minimize(self.cross_entropy)
you should call
train_step = self.optimize
P.S
Momentun is the default name for the MomentumOptimizer