Related
I have been getting this error and i cant figure out the reason. if anyone could help would be great.
this is my code:
import numpy as np
import pickle
import os
import download
#from dataset import one_hot_encoded
#from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from random import shuffle
data_path = "D:/Personal details/Internship/"
# Width and height of each image.
img_size = 32
# Number of channels in each image, 3 channels: Red, Green, Blue.
num_channels = 3
# Length of an image when flattened to a 1-dim array.
img_size_flat = img_size * img_size * num_channels
# Number of classes.
num_classes = 10
# Number of files for the training-set.
_num_files_train = 5
# Number of images for each batch-file in the training-set.
_images_per_file = 10000
def _get_file_path(filename=""):
return os.path.join(data_path, "cifar-10-batches-py/", filename)
def _unpickle(filename):
file_path = _get_file_path(filename)
print("Loading data: " + file_path)
with open(file_path, mode='rb') as file:
# In Python 3.X it is important to set the encoding,
# otherwise an exception is raised here.
data = pickle.load(file, encoding='bytes')
return data
def _convert_images(raw):
# Convert the raw images from the data-files to floating-points.
raw_float = np.array(raw, dtype=float) / 255.0
# Reshape the array to 4-dimensions.
images = raw_float.reshape([-1, num_channels, img_size, img_size])
# Reorder the indices of the array.
images = images.transpose([0, 2, 3, 1])
return images
def _load_data(filename):
# Load the pickled data-file.
data = _unpickle(filename)
# Get the raw images.
raw_images = data[b'data']
# Get the class-numbers for each image. Convert to numpy-array.
cls = np.array(data[b'labels'])
# Convert the images.
images = _convert_images(raw_images)
return images, cls
def load_class_names():
# Load the class-names from the pickled file.
raw = _unpickle(filename="batches.meta")[b'label_names']
# Convert from binary strings.
names = [x.decode('utf-8') for x in raw]
return names
def load_training_data():
images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float)
cls = np.zeros(shape=[_num_images_train], dtype=int)
# Begin-index for the current batch.
begin = 0
# For each data-file.
for i in range(_num_files_train):
# Load the images and class-numbers from the data-file.
images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1))
# Number of images in this batch.
num_images = len(images_batch)
# End-index for the current batch.
end = begin + num_images
# Store the images into the array.
images[begin:end, :] = images_batch
# Store the class-numbers into the array.
cls[begin:end] = cls_batch
# The begin-index for the next batch is the current end-index.
begin = end
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
def load_test_data():
images, cls = _load_data(filename="test_batch")
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
########################################################################
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
class_names = load_class_names()
images_train, cls_train, labels_train = load_training_data()
images_test, cls_test, labels_test = load_test_data()
images_train_train = images_train[0:45000]
validation_train = images_train[45000:50000]
labels_train_train = labels_train[0:45000]
validation_labels = labels_train[45000:]
print(len(images_train_train))
print(len(validation_train))
##print(class_names)
##print(len(images_train))
##print(cls_train)
##print(labels_train)
##print(cls_test)
##print(labels_test)
n_classes = len(class_names)
batch_size = 128
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name='x')
y = tf.placeholder(tf.float32, shape=[None, n_classes], name='y_true')
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([3,3,3,64])),
'W_conv2': tf.Variable(tf.random_normal([3,3,64,128])),
'W_conv3': tf.Variable(tf.random_normal([3,3,128,256])),
'W_conv4': tf.Variable(tf.random_normal([3,3,256,256])),
'W_fc1': tf.Variable(tf.random_normal([256,1024])),
'W_fc2': tf.Variable(tf.random_normal([1024,1024])),
'soft_max': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([64])),
'b_conv2': tf.Variable(tf.random_normal([128])),
'b_conv3': tf.Variable(tf.random_normal([256])),
'b_conv4': tf.Variable(tf.random_normal([256])),
'b_fc1': tf.Variable(tf.random_normal([1024])),
'b_fc2': tf.Variable(tf.random_normal([1024])),
'soft_max': tf.Variable(tf.random_normal([n_classes]))}
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['b_conv3'])
conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['b_conv4'])
conv4 = maxpool2d(conv4)
fc1 = tf.reshape(conv4,[256,-1])
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'] + biases['b_fc2']))
soft_max = tf.matmul(fc2, weights['soft_max']) + biases['soft_max']
return soft_max
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
i = 0
while i < len(images_train_train):
start = i
end = i+batch_size
batch_x = np.array(images_train_train[start:end])
batch_y = np.array(labels_train_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:validation_train, y:validation_labels}))
train_neural_network(x)
Ans this is the error i have been getting.
WARNING:tensorflow:From D:/Personal details/Internship/cifar-10v1.0.py:310: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.
See #{tf.nn.softmax_cross_entropy_with_logits_v2}.
WARNING:tensorflow:From C:\Python35\lib\site-packages\tensorflow\python\util\tf_should_use.py:118: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Traceback (most recent call last):
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 327, in train_neural_network
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
run_metadata_ptr)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
Caused by op 'MatMul', defined at:
File "<string>", line 1, in <module>
File "C:\Python35\lib\idlelib\run.py", line 130, in main
ret = method(*args, **kwargs)
File "C:\Python35\lib\idlelib\run.py", line 357, in runcode
exec(code, self.locals)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 309, in train_neural_network
prediction = convolutional_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 300, in convolutional_neural_network
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
File "C:\Python35\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2122, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 4567, in mat_mul
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
It looks like the problem is in convolutional_neural_network layer() function wherein somehow it is mad at not being able to multiply the same dimension of the matrix. But it is not clear how to solve the issue
Thank you for the help in advance...
After reshaping conv4 at line fc1 = tf.reshape(conv4,[256,-1]), the shape of fc1 is (256, 2048) and the weight matrix W_fc1 has shape (256, 1024). Thus, you get a size incompatible error at the next line fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
in the matrix multiplication part. I suggest you to go through the dimensions at every step manually to find errors in future.
Tensorflow 1.7 when using dynamic_rnn.It runs fine at first , but at the 32th(it changes when i run the code) step , the error appears. When i used smaller batch , it seems the code can run longer , however the error still poped up .Just cannt figure out what's wrong.
from mapping import *
def my_input_fn(features, targets, batch_size=20, shuffle=True, num_epochs=None, sequece_lenth=None):
ds = tf.data.Dataset.from_tensor_slices(
(features, targets, sequece_lenth)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
if shuffle:
ds = ds.shuffle(10000)
features, labels, sequence = ds.make_one_shot_iterator().get_next()
return features, labels, sequence
def lstm_cell(lstm_size=50):
return tf.contrib.rnn.BasicLSTMCell(lstm_size)
class RnnModel:
def __init__(self,
batch_size,
hidden_units,
time_steps,
num_features
):
self.batch_size = batch_size
self.hidden_units = hidden_units
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[lstm_cell(i) for i in self.hidden_units])
self.initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
self.model = stacked_lstm
self.state = self.initial_state
self.time_steps = time_steps
self.num_features = num_features
def loss_mean_squre(self, outputs, targets):
pos = tf.add(outputs, tf.ones(self.batch_size))
eve = tf.div(pos, 2)
error = tf.subtract(eve,
targets)
return tf.reduce_mean(tf.square(error))
def train(self,
num_steps,
learningRate,
input_fn,
inputs,
targets,
sequenceLenth):
periods = 10
step_per_periods = int(num_steps / periods)
input, target, sequence = input_fn(inputs, targets, self.batch_size, shuffle=True, sequece_lenth=sequenceLenth)
initial_state = self.model.zero_state(self.batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)
loss = self.loss_mean_squre(tf.reshape(outputs, [self.time_steps, self.batch_size])[-1], target)
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate)
grads_and_vars = optimizer.compute_gradients(loss, self.model.variables)
optimizer.apply_gradients(grads_and_vars)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(num_steps):
sess.run(init_op)
state2, current_loss= sess.run([state, loss])
if i % step_per_periods == 0:
print("period " + str(int(i / step_per_periods)) + ":" + str(current_loss))
return self.model, self.state
def processFeature(df):
df = df.drop('class', 1)
features = []
for i in range(len(df["vecs"])):
features.append(df["vecs"][i])
aa = pd.Series(features).tolist() # tramsform into list
featuresList = []
for i in features:
p1 = []
for k in i:
p1.append(list(k))
featuresList.append(p1)
return featuresList
def processTargets(df):
selected_features = df[
"class"]
processed_features = selected_features.copy()
return tf.convert_to_tensor(processed_features.astype(float).tolist())
if __name__ == '__main__':
dividNumber = 30
"""
some code here to modify my data to input
it looks like this:
inputs before use input function : [fullLenth, charactorLenth, embeddinglenth]
"""
model = RnnModel(15, [100, 80, 80, 1], time_steps=dividNumber, num_features=25)
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
And error is under here
Traceback (most recent call last):
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1330, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1315, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1423, in _call_tf_sessionrun
status, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 124, in train
state2, current_loss, nowAccuracy = sess.run([state, loss, accuracy])
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 908, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1143, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1324, in _do_run
run_metadata)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\client\session.py", line 1343, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
Caused by op 'rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat', defined at:
File "D:/programming/mlwords/dnn_gragh.py", line 198, in <module>
model.train(5000, 0.0001, my_input_fn, training_examples, training_targets, sequenceLenth=trainSequenceL)
File "D:/programming/mlwords/dnn_gragh.py", line 95, in train
outputs, state = tf.nn.dynamic_rnn(self.model, input, initial_state=initial_state)#,sequence_length=sequence
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 627, in dynamic_rnn
dtype=dtype)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 824, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3205, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2943, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2880, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3181, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 795, in _time_step
(output, new_state) = call_cell()
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn.py", line 781, in <lambda>
call_cell = lambda: cell(input_t, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1283, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\layers\base.py", line 714, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 620, in call
array_ops.concat([inputs, h], 1), self._kernel)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1181, in concat
return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1101, in concat_v2
"ConcatV2", values=values, axis=axis, name=name)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 3309, in create_op
op_def=op_def)
File "D:\Anaconda3\envs\tensorflow-cpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [20,25] vs. shape[1] = [30,100]
[[Node: rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3, rnn/while/Switch_4:1, rnn/while/rnn/multi_rnn_cell/cell_3/basic_lstm_cell/Const)]]
this is my code used to check my input
def checkData(inputs, targets, sequencelence):
batch_size = 20
features, target, sequece = my_input_fn(inputs, targets, batch_size=batch_size, shuffle=True, num_epochs=None,
sequece_lenth=sequencelence)
with tf.Session() as sess:
for i in range(1000):
features1, target1, sequece1 = sess.run([features, target, sequece])
assert len(features1) == batch_size
for sentence in features1 :
assert len(sentence) == 30
for word in sentence:
assert len(word) == 25
assert len(target1) == batch_size
assert len(sequece1) == batch_size
print(target1)
print("OK")
The error is coming from LSTMCell.call call method. There we are trying to tf.concat([inputs, h], 1) meaning that we want to concatenate the next input with the current hidden state before matmul'ing with the kernel variables matrix. The error is saying that you can't do it because the batch (0th) dimensions don't match up - your input is shaped [20,25] and your hidden state is shaped [30,100].
For some reason on your 32nd iteration, or whenever you see the error, the input is not batched to 30, but only to 20. This usually happens at the end of your training data when the total number of training examples does not evenly divide your batch size. This hypothesis is also consistent with "When i used smaller batch , it seems the code can run longer" statement.
I had the same issue. When I corrected the image input size to match the input shape, it ran without errors.
I followed this example on using tflearn trainer and coded this:
image_paths, labels = dataset_utils.read_dataset_list('../test/dummy_labels_file.txt')
data_dir = "../test/dummy_data/"
images = dataset_utils.read_images(data_dir=data_dir, image_paths=image_paths, image_extension='png')
print('Done reading images')
images = dataset_utils.resize(images, (1596, 48))
images = dataset_utils.transpose(images)
labels = dataset_utils.encode(labels)
x_train, x_test, y_train, y_test = dataset_utils.split(features=images, test_size=0.5, labels=labels)
... # parameters initiailized here
with tf.Graph().as_default():
X = tf.placeholder(tf.float32, [None, None, num_features])
Y = tf.placeholder(tf.int32)
sparse_Y = network_utils.dense_to_sparse(Y, num_classes)
seq_lens = tf.placeholder(tf.int32, [None])
def dnn(x):
layer = network_utils.bidirectional_grid_lstm(inputs=x, num_hidden=num_hidden_units)
layer = network_utils.get_time_major(inputs=layer, batch_size=network_utils.get_shape(x)[0],
num_classes=num_classes, num_hidden_units=num_hidden_units * 2)
return layer
net = dnn(X)
cost = network_utils.cost(network_utils.ctc_loss(inputs=net, labels=sparse_Y, sequence_length=seq_lens))
optimizer = network_utils.get_optimizer(learning_rate=learning_rate, optimizer_name=optimizer_name)
train_op = tflearn.TrainOp(loss=cost, optimizer=optimizer)
trainer = tflearn.Trainer(train_ops=train_op)
trainer.fit(feed_dicts={X: x_train, Y: y_train, seq_lens: dataset_utils.get_seq_lens(x_train)},
val_feed_dicts={X: x_test, Y: y_test, seq_lens: dataset_utils.get_seq_lens(x_test)},
n_epoch=1) #error happens here
The training starts when I run it but I encounter this error:
Traceback (most recent call last):
File ".../Optimized_OCR/main/train_using_tflearn_trainer.py", line 53, in <module>
tf.app.run(main=main)
File "...\tensorflow\python\platform\app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File ".../Optimized_OCR/main/train_using_tflearn_trainer.py", line 49, in main
n_epoch=1)
File "...\tflearn\helpers\trainer.py", line 338, in fit
show_metric)
File "...\tflearn\helpers\trainer.py", line 817, in _train
feed_batch)
File "...\tensorflow\python\client\session.py", line 889, in run
run_metadata_ptr)
File "...\tensorflow\python\client\session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "...\tensorflow\python\client\session.py", line 1317, in _do_run
options, run_metadata)
File "...\tensorflow\python\client\session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[0] does not have value
Has anyone else encountered this? How do fix this? I do want to use tflearn trainer to have an easier time training and testing my ocr models and I think this is the only thing I need to fix to be able to use it.
This is my code in tensorflow to train a GAN. I am training des to able to distinguish between fake and original video. I have important not relevant part of code to avoid stack over flow mostly code error
X = tf.placeholder(tf.float32, shape=[None, 28, 28])
D_W1 = tf.Variable(xavier_init([1024, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))
D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))
theta_D = [D_W1, D_W2, D_b1, D_b2]
rnn_size = 1024
rnn_layer = 2
Z = tf.placeholder(tf.float32, shape=[None, 100])
G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))
G_W2 = tf.Variable(xavier_init([128, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))
theta_G = [G_W1, G_W2, G_b1, G_b2]
def sample_Z(m, n):
return np.random.uniform(-1., 1., size=[m, n])
def generator(z):
G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
G_prob = tf.nn.sigmoid(G_log_prob)
G_prob = tf.reshape(G_prob, [-1,28, 28])
return G_prob
def discriminator(x):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
G_sample = generator(Z)
print(G_sample.get_shape())
print(X.get_shape())
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)
D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))
summary_d = tf.summary.histogram('D_loss histogram', D_loss)
summary_g = tf.summary.histogram('D_loss histogram', G_loss)
summary_s = tf.summary.scalar('D_loss scalar', D_loss)
summary_s1 = tf.summary.scalar('scalar scalar', G_loss)
# Add image summary
summary_op = tf.summary.image("plot", image)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)
mb_size = 128
Z_dim = 100
mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
# merged_summary_op = tf.summary.merge_all()
sess = tf.Session()
saver = tf.train.Saver()
writer1 = tf.summary.FileWriter('log/log-sample1', sess.graph)
writer2 = tf.summary.FileWriter('log/log-sample2', sess.graph)
sess.run(tf.global_variables_initializer())
if not os.path.exists('out/'):
os.makedirs('out/')
i = 0
with tf.variable_scope("myrnn") as scope:
for it in range(5000):
X_mb, _ = mnist.train.next_batch(mb_size)
X_mb = tf.reshape(X_mb, [mb_size, -1, 28])
_, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
_, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})
summary_str, eded = sess.run([summary_d, summary_s], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer1.add_summary(summary_str, it)
writer1.add_summary(eded, it)
summary_str1, eded1 = sess.run([summary_g, summary_s1], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer2.add_summary(summary_str1, it)
writer2.add_summary(eded1, it)
if it % 1000 == 0:
print('Iter: {}'.format(it))
print('D loss: {:.4}'. format(D_loss_curr))
print('G_loss: {:.4}'.format(G_loss_curr))
print()
save_path = saver.save(sess, "tmp/model.ckpt")
writer1.close()
writer2.close()
`
Following is the error when I run this code please help.
Traceback (most recent call last):
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 104, in <module>
D_fake, D_logit_fake = discriminator(G_sample)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1212, in static_rnn
(output, state) = call_cell()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 103, in <module>
D_real, D_logit_real = discriminator(X)
It is GAN. I am using MNIST data to train generator and discriminator.
Add a reuse parameter to the BasicLSTMCell. Since you are calling the discriminator function twice and calling reuse=None, both the times, it throws the errors when try to create variables with same name. In this context you need to reuse the variables from the graph for the second call; as you don't need to create new set of variables.
def discriminator(x, reuse):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True, reuse=reuse))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
....
D_real, D_logit_real = discriminator(X, None)
D_fake, D_logit_fake = discriminator(G_sample, True)
....
I am currently working on an AI for openai, I am trying to pass random data collected to make a model of a neural network, then use that model to create new data. When I try to make another model using the new trained data it wont let e create a new model and gives an
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]].
my code:
import gym
import random
import numpy as np
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import matplotlib.pyplot as plt
env = gym.make("CartPole-v1")
env.reset()#restarts the enviroment
epoch = 5
LR = 2e-4
max_score = 500
number_of_training_games = 100
generations = 3
training_scores = []
random_gen_score = []
def create_random_training_data():
x = 0
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
training_scores = array_of_scores
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def training_model(sample_data):
inputs = np.array([i[0] for i in sample_data]).reshape(-1,4,1)
correct_output = [i[1] for i in sample_data]
model = neural_network(input_size = len(inputs[0]))
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
print(input)
return model
def neural_network(input_size):
# this is where our observation data will go
network = input_data(shape=[None, input_size, 1], name = 'input')
# our neural networks
network = fully_connected(network, 128, activation = 'relu')
#dropout is used to drop randon nodes inorder to reduce over training
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 512, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation = 'relu')
network = dropout(network, 0.8)
network = fully_connected(network, 128, activation = 'relu')
network = dropout(network, 0.8)
# this is the output
network = fully_connected(network, 2, activation = 'softmax')
network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def run_generation():
random_sample_data = []
trained_data = []
for i in range(generations):
if len(random_sample_data) ==0:
random_sample_data = create_random_training_data()
model1 = training_model(random_sample_data)
else:
trained_data = one_generation(model1)
model2 = training_model(trained_data)
return model2
def one_generation(model):
accepted_training_data = []
scores_and_data = []
array_of_scores = []
for i in range(number_of_training_games):
score = 0
prev_observation = []
training_data = []
for _ in range(max_score):
if len(prev_observation) == 0:
action = env.action_space.sample()
else:
action = np.argmax(model.predict(prev_observation.reshape(-1,len(prev_observation),1))[0])
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0:
training_data.append([prev_observation, action])
prev_observation = observation
score += reward
if done:
array_of_scores.append(score)
break
for i in training_data:
scores_and_data.append([score,i[0],i[1]])
# reset enviroment
env.reset()
for data in scores_and_data:
if data[0] > median(array_of_scores):
if data[2] == 1:
output = [0,1]
elif data[2] == 0:
output = [1,0]
accepted_training_data.append([data[1], output])
return accepted_training_data
def testing():
scores = []
model = run_generation()
for _ in range(100):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(max_score):
env.render()
#first move is going to be random
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
#records actions
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([new_observation, action])
score+=reward
if done: break
scores.append(score)
#print('Average training Score:',sum(training_scores)/len(training_scores))
print('Average Score:',sum(scores)/len(scores))
print (scores)
testing()
error:
Traceback (most recent call last):
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/anaconda/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 92, in training_model
model.fit({'input': inputs}, {'targets': correct_output}, n_epoch=epoch , snapshot_step=500, show_metric=True, run_id='openai_learning')
File "/anaconda/lib/python3.6/site-packages/tflearn/models/dnn.py", line 215, in fit
callbacks=callbacks)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 336, in fit
show_metric)
File "/anaconda/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 777, in _train
feed_batch)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'input_1/X', defined at:
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 259, in <module>
testing()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 219, in testing
model = run_generation()
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 148, in run_generation
model2 = training_model(trained_data)
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 90, in training_model
model = neural_network(input_size = len(inputs[0]))
File "/Users/Duncan/Desktop/ai projects/evolutionDNN.py", line 100, in neural_network
network = input_data(shape=[None, input_size, 1], name = 'input')
File "/anaconda/lib/python3.6/site-packages/tflearn/layers/core.py", line 81, in input_data
placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1507, in placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1997, in _placeholder
name=name)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float
[[Node: input_1/X = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
this line:
network = input_data(shape=[None, input_size, 1], name = 'input')
should be
network = input_data(shape=[None, input_size,input_size , 1], name = 'input')
there should be 4 arguments first is taken as place holder.
Try this.