This is my code in tensorflow to train a GAN. I am training des to able to distinguish between fake and original video. I have important not relevant part of code to avoid stack over flow mostly code error
X = tf.placeholder(tf.float32, shape=[None, 28, 28])
D_W1 = tf.Variable(xavier_init([1024, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))
D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))
theta_D = [D_W1, D_W2, D_b1, D_b2]
rnn_size = 1024
rnn_layer = 2
Z = tf.placeholder(tf.float32, shape=[None, 100])
G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))
G_W2 = tf.Variable(xavier_init([128, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))
theta_G = [G_W1, G_W2, G_b1, G_b2]
def sample_Z(m, n):
return np.random.uniform(-1., 1., size=[m, n])
def generator(z):
G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
G_prob = tf.nn.sigmoid(G_log_prob)
G_prob = tf.reshape(G_prob, [-1,28, 28])
return G_prob
def discriminator(x):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
G_sample = generator(Z)
print(G_sample.get_shape())
print(X.get_shape())
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)
D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))
summary_d = tf.summary.histogram('D_loss histogram', D_loss)
summary_g = tf.summary.histogram('D_loss histogram', G_loss)
summary_s = tf.summary.scalar('D_loss scalar', D_loss)
summary_s1 = tf.summary.scalar('scalar scalar', G_loss)
# Add image summary
summary_op = tf.summary.image("plot", image)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)
mb_size = 128
Z_dim = 100
mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
# merged_summary_op = tf.summary.merge_all()
sess = tf.Session()
saver = tf.train.Saver()
writer1 = tf.summary.FileWriter('log/log-sample1', sess.graph)
writer2 = tf.summary.FileWriter('log/log-sample2', sess.graph)
sess.run(tf.global_variables_initializer())
if not os.path.exists('out/'):
os.makedirs('out/')
i = 0
with tf.variable_scope("myrnn") as scope:
for it in range(5000):
X_mb, _ = mnist.train.next_batch(mb_size)
X_mb = tf.reshape(X_mb, [mb_size, -1, 28])
_, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
_, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})
summary_str, eded = sess.run([summary_d, summary_s], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer1.add_summary(summary_str, it)
writer1.add_summary(eded, it)
summary_str1, eded1 = sess.run([summary_g, summary_s1], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer2.add_summary(summary_str1, it)
writer2.add_summary(eded1, it)
if it % 1000 == 0:
print('Iter: {}'.format(it))
print('D loss: {:.4}'. format(D_loss_curr))
print('G_loss: {:.4}'.format(G_loss_curr))
print()
save_path = saver.save(sess, "tmp/model.ckpt")
writer1.close()
writer2.close()
`
Following is the error when I run this code please help.
Traceback (most recent call last):
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 104, in <module>
D_fake, D_logit_fake = discriminator(G_sample)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1212, in static_rnn
(output, state) = call_cell()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 103, in <module>
D_real, D_logit_real = discriminator(X)
It is GAN. I am using MNIST data to train generator and discriminator.
Add a reuse parameter to the BasicLSTMCell. Since you are calling the discriminator function twice and calling reuse=None, both the times, it throws the errors when try to create variables with same name. In this context you need to reuse the variables from the graph for the second call; as you don't need to create new set of variables.
def discriminator(x, reuse):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True, reuse=reuse))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
....
D_real, D_logit_real = discriminator(X, None)
D_fake, D_logit_fake = discriminator(G_sample, True)
....
Related
Tensorflow 1.7 when using dynamic_rnn.It runs fine at first , but at the 32th(it changes when i run the code) step , the error appears. When i used smaller batch , it seems the code can run longer , however the error still poped up .Just cannt figure out what's wrong.
from mapping import *
def my_input_fn(features, targets, batch_size=20, shuffle=True, num_epochs=None, sequece_lenth=None):
ds = tf.data.Dataset.from_tensor_slices(
(features, targets, sequece_lenth)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
if shuffle:
ds = ds.shuffle(10000)
features, labels, sequence = ds.make_one_shot_iterator().get_next()
return features, labels, sequence
def lstm_cell(lstm_size=50):
return tf.contrib.rnn.BasicLSTMCell(lstm_size)
class RnnModel:
def __init__(self,
batch_size,
hidden_units,
time_steps,
num_features
):
self.batch_size = batch_size
self.hidden_units = hidden_units
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[lstm_cell(i) for i in self.hidden_units])
self.initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
self.model = stacked_lstm
self.state = self.initial_state
self.time_steps = time_steps
self.num_features = num_features
def loss_mean_squre(self, outputs, targets):
pos = tf.add(outputs, tf.ones(self.batch_size))
eve = tf.div(pos, 2)
error = tf.subtract(eve,
targets)
return tf.reduce_mean(tf.square(error))
def train(self,
num_steps,
learningRate,
input_fn,
inputs,
targets,
sequenceLenth):
periods = 10
step_per_periods = int(num_steps / periods)
input, target, sequence = input_fn(inputs, targets, self.batch_size, shuffle=True, sequece_lenth=sequenceLenth)
initial_state = self.model.zero_state(self.batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)
loss = self.loss_mean_squre(tf.reshape(outputs, [self.time_steps, self.batch_size])[-1], target)
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate)
grads_and_vars = optimizer.compute_gradients(loss, self.model.variables)
optimizer.apply_gradients(grads_and_vars)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(num_steps):
sess.run(init_op)
state2, current_loss= sess.run([state, loss])
if i % step_per_periods == 0:
print("period " + str(int(i / step_per_periods)) + ":" + str(current_loss))
return self.model, self.state
def processFeature(df):
df = df.drop('class', 1)
features = []
for i in range(len(df["vecs"])):
features.append(df["vecs"][i])
aa = pd.Series(features).tolist() # tramsform into list
featuresList = []
for i in features:
p1 = []
for k in i:
p1.append(list(k))
featuresList.append(p1)
return featuresList
def processTargets(df):
selected_features = df[
"class"]
processed_features = selected_features.copy()
return tf.convert_to_tensor(processed_features.astype(float).tolist())
if __name__ == '__main__':
dividNumber = 30
"""
some code here to modify my data to input
it looks like this:
inputs before use input function : [fullLenth, charactorLenth, embeddinglenth]
"""
model = RnnModel(15, [100, 80, 80, 1], time_steps=dividNumber, num_features=25)
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
And error is under here
Traceback (most recent call last):
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1330, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1315, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1423, in _call_tf_sessionrun
status, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 124, in train
state2, current_loss, nowAccuracy = sess.run([state, loss, accuracy])
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 908, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1143, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1324, in _do_run
run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1343, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
Caused by op 'rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat', defined at:
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 95, in train
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)#,sequence_length=sequence
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 627, in dynamic_rnn
dtype=dtype)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 824, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3205, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2943, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2880, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3181, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 795, in _time_step
(output, new_state) = call_cell()
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 781, in <lambda>
call_cell = lambda: cell(input_t, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1283, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 620, in call
array_ops.concat([inputs, h], 1), self._kernel)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1181, in concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1101, in concat_v2
"ConcatV2", values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 3309, in create_op
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
this is my code used to check my input
def checkData(inputs, targets, sequencelence):
batch_size = 20
features, target, sequece = my_input_fn(inputs, targets, batch_size=batch_size, shuffle=True, num_epochs=None,
sequece_lenth=sequencelence)
with tf.Session() as sess:
for i in range(1000):
features1, target1, sequece1 = sess.run([features, target, sequece])
assert len(features1) == batch_size
for sentence in features1 :
assert len(sentence) == 30
for word in sentence:
assert len(word) == 25
assert len(target1) == batch_size
assert len(sequece1) == batch_size
print(target1)
print("OK")
The error is coming from LSTMCell.call call method. There we are trying to tf.concat([inputs, h], 1) meaning that we want to concatenate the next input with the current hidden state before matmul'ing with the kernel variables matrix. The error is saying that you can't do it because the batch (0th) dimensions don't match up - your input is shaped [20,25] and your hidden state is shaped [30,100].
For some reason on your 32nd iteration, or whenever you see the error, the input is not batched to 30, but only to 20. This usually happens at the end of your training data when the total number of training examples does not evenly divide your batch size. This hypothesis is also consistent with "When i used smaller batch , it seems the code can run longer" statement.
I had the same issue. When I corrected the image input size to match the input shape, it ran without errors.
I am trying to code a neural network which can recognize handwritten digits. I am using the MNIST dataset and the tensor flow library. For now, I am only trying to train the network but it throws a huge error whenever I run it. I am a beginner, so I am very sorry if the code looks bad.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data", one_hot = True)
numNodesH1 = 600
numNodesH2 = 500
numNodesH3 = 500
numNodesOut = 10
sizeOfBatch = 150
y = tf.placeholder("float")
x = tf.placeholder("float", [None, 784])
def neuralNetwork(value):
H1 = {'weights': tf.Variable(tf.random_normal([784, numNodesH1])),
"biases": tf.Variable(tf.random_normal([numNodesH1]))}
H2 = {'weights': tf.Variable(tf.random_normal([numNodesH1,
numNodesH2])),
"biases": tf.Variable(tf.random_normal([numNodesH2]))}
H3 = {"weights": tf.Variable(tf.random_normal([numNodesH2,
numNodesH3])),
"biases": tf.Variable(tf.random_normal([numNodesH3]))}
output = {"weights": tf.Variable(tf.random_normal([numNodesH3,
numNodesOut])),
"biases": tf.Variable(tf.random_normal([numNodesOut]))}
FinalH1 = tf.add(tf.matmul(value, H1["weights"]), H1["biases"])
FinalH1 = tf.nn.relu(FinalH1)
FinalH2 = tf.add(tf.matmul(H1, H2["weights"]), H2["biases"])
FinalH2 = tf.nn.relu(FinalH2)
FinalH3 = tf.add(tf.matmul(H2, H3["weights"]), H3["biases"])
FinalH3 = tf.nn.relu(FinalH3)
FinalOut = tf.matmul(H3, output["weights"]) + output["biases"]
return FinalOut
def train(inputdata):
prediction = neuralNetwork(inputdata)
cost=tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizingTool = tf.train.AdamOptimizer().minimize(cost)
epochsNum = 10
with tf.Session as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochsNum):
lostEpochs = 0
for o in range(int(mnist.train.num_examples / sizeOfBatch)):
ex, ey = mnist.train.next_batch(sizeOfBatch)
_, c = sess.run([optimizer, cost], feed_dict = {x: ex, y:
ey})
lostEpochs = lostEpochs + c
print("Epochs completed = ", i, " / ", epochsNum, " epoch loss =
", lostEpochs)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
neuralAccuracy = tf.reduce_mean(tf.cast(correct, "float"))
print(neuralAccuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
train(x)
Every time I run this code, it gives me this error:
Traceback (most recent call last):
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 468, in
make_tensor_proto
str_values = [compat.as_bytes(x) for x in proto_values]
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 468, in
<listcomp>
str_values = [compat.as_bytes(x) for x in proto_values]
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\util\compat.py", line 65, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got {'weights': <tf.Variable
'Variable:0' shape=(784, 600) dtype=float32_ref>, 'biases': <tf.Variable
'Variable_1:0' shape=(600,) dtype=float32_ref>}
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 45, in
<module>
train(x)
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 29, in
train
prediction = neuralNetwork(inputdata)
File "C:\Users\Msi-
\AppData\Local\Programs\Python\Python36\neuralnetworktest.py", line 22, in
neuralNetwork
FinalH2 = tf.add(tf.matmul(H1, H2["weights"]), H2["biases"])
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\ops\math_ops.py", line 1844, in matmul
a = ops.convert_to_tensor(a, name="a")
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\ops.py", line 836, in convert_to_tensor
as_ref=False)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\ops.py", line 926, in
internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\constant_op.py", line 229, in
_constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\constant_op.py", line 208, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "C:\Users\Msi-\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\tensor_util.py", line 472, in
make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'dict'> to Tensor.
Contents:
{'weights': <tf.Variable 'Variable:0' shape=(784, 600) dtype=float32_ref>,
'biases': <tf.Variable 'Variable_1:0' shape=(600,) dtype=float32_ref>}.
Consider
casting elements to a supported type.
I think you meant
FinalH1 = tf.add(tf.matmul(value, H1["weights"]), H1["biases"])
FinalH1 = tf.nn.relu(FinalH1)
FinalH2 = tf.add(tf.matmul(FinalH1, H2["weights"]), H2["biases"])
FinalH2 = tf.nn.relu(FinalH2)
FinalH3 = tf.add(tf.matmul(FinalH2, H3["weights"]), H3["biases"])
FinalH3 = tf.nn.relu(FinalH3)
FinalOut = tf.matmul(FinalH3, output["weights"]) + output["biases"]
Note FinalH1 instead of H1 and that same for H2 and H3.
I am currently working on an AI for openai, I am trying to pass random data collected to make a model of a neural network, then use that model to create new data. When I try to make another model using the new trained data it wont let e create a new model and gives an
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]].
my code:
import gym
import random
import numpy as np
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import matplotlib.pyplot as plt
env = gym.make("CartPole-v1")
env.reset()#restarts the enviroment
epoch = 5
LR = 2e-4
max_score = 500
number_of_training_games = 100
generations = 3
training_scores = []
random_gen_score = []
def create_random_training_data():
x = 0
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
training_scores = array_of_scores
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def training_model(sample_data):
inputs = np.array([i[0] for i in sample_data]).reshape(-1,4,1)
correct_output = [i[1] for i in sample_data]
model = neural_network(input_size = len(inputs[0]))
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
print(input)
return model
def neural_network(input_size):
# this is where our observation data will go
network = input_data(shape=[None, input_size, 1], name = 'input')
# our neural networks
network = fully_connected(network, 128, activation = 'relu')
#dropout is used to drop randon nodes inorder to reduce over training
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 512, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 128, activation = 'relu')
network = dropout(network, 0.8)
# this is the output
network = fully_connected(network, 2, activation = 'softmax')
network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def run_generation():
random_sample_data = []
trained_data = []
for i in range(generations):
if len(random_sample_data) ==0:
random_sample_data = create_random_training_data()
model1 = training_model(random_sample_data)
else:
trained_data = one_generation(model1)
model2 = training_model(trained_data)
return model2
def one_generation(model):
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
if len(prev_observation) == 0:
action = env.action_space.sample()
else:
action = np.argmax(model.predict(prev_observation.reshape(-1,len(prev_observation),1))[0])
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def testing():
scores = []
model = run_generation()
for _ in range(100):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(max_score):
env.render()
#first move is going to be random
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
#records actions
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([new_observation, action])
score+=reward
if done: break
scores.append(score)
#print('Average training Score:',sum(training_scores)/len(training_scores))
print('Average Score:',sum(scores)/len(scores))
print (scores)
testing()
error:
Traceback (most recent call last):
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/anaconda/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 92, in training_model
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
File "/anaconda/lib/python3.6/site-packages/tflearn/models/dnn.py", line 215, in fit
callbacks=callbacks)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 336, in fit
show_metric)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 777, in _train
feed_batch)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'input_1/X', defined at:
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 90, in training_model
model = neural_network(input_size = len(inputs[0]))
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 100, in neural_network
network = input_data(shape=[None, input_size, 1], name = 'input')
File "/anaconda/lib/python3.6/site-packages/tflearn/layers/core.py", line 81, in input_data
placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1507, in placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1997, in _placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
this line:
network = input_data(shape=[None, input_size, 1], name = 'input')
should be
network = input_data(shape=[None, input_size,input_size , 1], name = 'input')
there should be 4 arguments first is taken as place holder.
Try this.
I'm learning TensorFlow. I was trying tf.train.MomentumOptimizer but I got the following error:
Traceback (most recent call last):
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 70, in run
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
Caused by op u'Momentum/update_Variable_2/ApplyMomentum', defined at:
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 55, in run
train_step = self.optimizer.minimize(self.cross_entropy)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 289, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 413, in apply_gradients
update_ops.append(processor.update_op(self, grad))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 61, in update_op
return optimizer._apply_dense(g, self._v) # pylint: disable=protected-access
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/momentum.py", line 69, in _apply_dense
use_nesterov=self._use_nesterov).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 348, in apply_momentum
use_nesterov=use_nesterov, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
And following is my code:
import time
import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
class ReluMnistNet:
def __init__(self, optimizer=None):
self.varlist = []
self.optimizer = optimizer or tf.train.GradientDescentOptimizer(0.01)
# fetch dataset
self.mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# prepare environment
layers = [ 100 ]
input_layer = 784
output_layer = 10
self.x = tf.placeholder(tf.float32, [None, input_layer])
last_layer = input_layer
y = self.x
for layer in layers:
b = tf.Variable(tf.zeros([layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,layer], stddev=0.01))
self.varlist.append(W)
y = tf.nn.relu( tf.matmul(y,W) ) + b
last_layer = layer
b = tf.Variable(tf.zeros([output_layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,output_layer], stddev=0.01))
self.varlist.append(W)
self.y = tf.matmul(y,W) + b
self.y_ = tf.placeholder(tf.float32, [None, 10])
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
def prepare(self):
# init = tf.initialize_variables(self.varlist)
init = tf.initialize_all_variables()
self.sess = tf.Session()
self.sess.run(init)
def run(self, batch_size=100, stop=0.001, print_epoch=False):
mnist = self.mnist
data_size = mnist.train.images.shape[0]
last_accuracy = 0
accuracy_history = []
train_step = self.optimizer.minimize(self.cross_entropy)
time1 = time.time()
for i in range(10000):
for j in range(data_size/batch_size):
# random batch
batch_idx = np.arange(data_size)
np.random.shuffle(batch_idx)
batch_idx = batch_idx[0:batch_size]
batch_xs = mnist.train.images[batch_idx]
batch_ys = mnist.train.labels[batch_idx]
# ordered batch
# start = j * batch_size
# end = (j+1) * batch_size
# batch_xs, batch_ys = mnist.train.images[start:end], mnist.train.labels[start:end]
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
# test the accuracy
correct_prediction = tf.equal( tf.argmax(self.y,1), tf.argmax(self.y_,1) )
accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )
accuracy = self.sess.run(accuracy, feed_dict = {self.x: mnist.test.images, self.y_: mnist.test.labels})
accuracy_history.append(accuracy)
if print_epoch:
print i, accuracy
if last_accuracy != 0 and abs(last_accuracy-accuracy) < stop:
break
last_accuracy = accuracy
time2 = time.time()
return accuracy_history, (time2-time1)
def close(self):
if not (self.sess is None):
self.sess.close()
self.sess = None
if __name__ == '__main__':
learner = ReluMnistNet()
# learner.optimizer = tf.train.GradientDescentOptimizer(0.01)
learner.optimizer = tf.train.MomentumOptimizer(0.01, momentum=0.9)
for i in range(10):
learner.prepare()
learner.run(stop=0.01, print_epoch=True)
learner.close()
It seems like a variable named Momentum is uninitialized? However, by calling learner.prepare(), I have called tf.initialize_all_variables(). Even more, I have no variable named Momentum. Why does this happens?
In your code you are calling minimize after initializing global variables
instead you have to do:
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
self.optimize = self.optimizer.minimize(self.cross_entropy)
and in run function instead of
train_step = self.optimizer.minimize(self.cross_entropy)
you should call
train_step = self.optimize
P.S
Momentun is the default name for the MomentumOptimizer
I'm using the code pasted below. The 'forward' part of the code seems to work by virtue of the "assert root_emb == 1 + emb[0] * emb[1]" passing. However, once a training step is taken (the line following the assert), a strange error appears suggesting an issue with the TensorArray written to during the wihle loop.
tensorflow.python.framework.errors.InvalidArgumentError: TensorArray
TensorArray#gradients: Could not read from TensorArray index 2 because
it has not yet been written to. [[Node:
gradients/while/TensorArrayWrite_grad/TensorArrayRead =
TensorArrayRead[_class=["loc:#TensorArray"], dtype=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/cpu:0"](gradients/while/TensorArrayWrite_grad/TensorArrayGrad/TensorArrayGrad,
gradients/while/TensorArrayWrite_grad/TensorArrayRead/StackPop,
gradients/while/TensorArrayWrite_grad/TensorArrayGrad/gradient_flow)]]
Caused by op u'gradients/while/TensorArrayWrite_grad/TensorArrayRead',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 61, in init
self.grad = tf.gradients(self.loss, self.params) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py",
line 481, in gradients
in_grads = _AsList(grad_fn(op, *out_grads)) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py",
line 115, in _TensorArrayWriteGrad
grad = g.read(index) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 177, in read
dtype=self._dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 781, in _tensor_array_read
flow_in=flow_in, dtype=dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 2154, in create_op
original_op=self._default_original_op, op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py",
line 1154, in init
self._traceback = _extract_stack()
...which was originally created as op u'while/TensorArrayWrite',
defined at: File "minimal.py", line 82, in
model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 50, in init
loop_vars=(self.time, node_emb, tf.zeros([1]))) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1681, in While
back_prop=back_prop, swap_memory=swap_memory, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1671, in while_loop
result = context.BuildLoop(cond, body, loop_vars) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py",
line 1572, in BuildLoop
body_result = body(*vars_for_body_with_tensor_arrays) File "minimal.py", line 43, in _recurrence
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) File
"/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py",
line 200, in write
name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py",
line 875, in _tensor_array_write
value=value, flow_in=flow_in, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py",
line 694, in apply_op
op_def=op_def)
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import tensor_array_ops, control_flow_ops
class TreeRNN(object):
def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01):
self.num_emb = num_emb
self.emb_dim = emb_dim
self.output_dim = output_dim
self.degree= degree
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
self.recursive_unit = self.create_recursive_unit()
self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim]))
self.b_out = tf.Variable(self.init_vector([self.output_dim]))
self.x = tf.placeholder(tf.int32, shape=[None]) # word indices
self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1])
self.y = tf.placeholder(tf.float32, shape=[self.output_dim])
num_words, = tf.unpack(tf.shape(self.x), 1) # also num leaves
emb_x = tf.gather(self.embeddings, self.x)
node_emb = tensor_array_ops.TensorArray(
dtype=tf.float32, size=num_words - 1, dynamic_size=True,
clear_after_read=False)
node_emb = node_emb.unpack(emb_x)
num_nodes, _ = tf.unpack(tf.shape(self.tree), 2) # num internal nodes
tree_traversal = tensor_array_ops.TensorArray(
dtype=tf.int32, size=num_nodes)
tree_traversal = tree_traversal.unpack(self.tree)
def _recurrence(t, node_emb, _):
node_info = tree_traversal.read(t)
children_and_parent = tf.unpack(node_info, self.degree + 1)
child_emb = []
for i in xrange(self.degree):
child_emb.append(node_emb.read(children_and_parent[i]))
parent_emb = self.recursive_unit(child_emb)
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb)
return t + 1, new_node_emb, parent_emb
self.time = tf.constant(0, dtype=tf.int32, name='time')
_, _, final_emb = control_flow_ops.While(
cond=lambda t, _1, _2: t < num_nodes,
body=_recurrence,
loop_vars=(self.time, node_emb, tf.zeros([1])))
self.final_state = final_emb
self.pred_y = self.activation(
tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1]))
+ self.b_out)
self.loss = self.loss_fn(self.y, self.pred_y)
self.params = tf.trainable_variables()
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
self.grad = tf.gradients(self.loss, self.params)
self.updates = opt.apply_gradients(zip(self.grad, self.params))
def init_matrix(self, shape):
return tf.random_normal(shape, stddev=0.1)
def init_vector(self, shape):
return tf.zeros(shape)
def create_recursive_unit(self):
def unit(child_emb): # very simple
return 1 + child_emb[0] * child_emb[1]
return unit
def activation(self, inp):
return tf.sigmoid(inp)
def loss_fn(self, y, pred_y):
return tf.reduce_sum(tf.square(y - pred_y))
model = TreeRNN(8, 1, 1, degree=2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
root_emb = sess.run([model.final_state],
feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])})
emb, = sess.run([model.embeddings])
assert root_emb == 1 + emb[0] * emb[1]
out = sess.run([model.updates, model.loss],
feed_dict={model.x: np.array([0, 1]),
model.tree: np.array([[0, 1, 2]]),
model.y: np.array([0])})
set parallel_iterations=1 in tf.while_loop