Error when using tf.compat.v1.nn.rnn_cell.LSTMCell in tensorflow2.0 - tensorflow

I am trying to use lstm with projection in tensorflow2.0 .
I am writing a custom model, however I am getting error
ValueError: Dimensions must be equal, but are 4096 and 2288 for 'transducer/rnn_model/encoder_lstm1/MatMul' (op: 'MatMul') with input shapes: [4,4096], [2288,8192].
Below is the code:
class RNNModel(tf.keras.Model):
def __init__(self, input_size, vocab_size, hidden_size=2048, num_layers=8, dropout=.2, blank=0, bidirectional=False):
super(RNNModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.vocab_size = vocab_size
self.blank = blank
self.lstm_layers = []
self.proj_layers = []
self.lstm_proj_layers = []
self.batch_norm_layers = []
self.proj_dim = 640
rnn_size = hidden_size
output_dim = rnn_size
self.cell = tf.compat.v1.nn.rnn_cell.LSTMCell(2048, num_proj=2048)
for i in range(self.num_layers):
name1='encoder_lstm'+str(i)
self.lstm_layers.append(tf.keras.layers.RNN(self.cell, return_sequences=True, name=name1))
def reshape_pyramidal(self, outputs):
shape = tf.shape(outputs)
batch_size, max_time = shape[0], shape[1]
num_units = outputs.get_shape().as_list()[-1]
pads = [[0, 0], [0, tf.math.floormod(max_time, 2)], [0, 0]]
outputs = tf.pad(outputs, pads)
concat_outputs = tf.reshape(outputs, (batch_size, -1, num_units * 2))
return concat_outputs
def call(self, x, xlen):
for i in range(self.num_layers):
output = self.lstm_layers[i](inputs=x)
x = output
return x, state_h
Following is the error
train_proj.py:92 train_step *
xs, _ = self.encoder(xs_1, xlen)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/home/ubuntu/E2E-ASR/model_proj.py:84 call *
output = self.lstm_layers[i](inputs=x)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/recurrent.py:623 __call__
return super(RNN, self).__call__(inputs, **kwargs)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/recurrent.py:756 call
zero_output_for_mask=self.zero_output_for_mask)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py:4035 rnn
input_time_zero, tuple(initial_states) + tuple(constants))
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/recurrent.py:732 step
output, new_states = self.cell.call(inputs, states, **kwargs)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/ops/rnn_cell_impl.py:1028 call
array_ops.concat([inputs, m_prev], 1), self._kernel)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/util/dispatch.py:180 wrapper
return target(*args, **kwargs)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/ops/math_ops.py:2765 matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_math_ops.py:6136 mat_mul
name=name)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py:793 _apply_op_helper
op_def=op_def)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py:548 create_op
compute_device)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:3429 _create_op_internal
op_def=op_def)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1773 __init__
control_input_ops)
/home/ubuntu/tf2/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1613 _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 4096 and 2288 for 'transducer/rnn_model/encoder_lstm1/MatMul' (op: 'MatMul') with input shapes: [4,4096], [2288,8192].

Related

TypeError: Can not convert a NoneType into a Tensor or Operation -- Error believe related to converting to graph

Below find my model:
class CustomModel(tf.keras.Model):
def __init__(self, model1, model2, model3, model4):
super(deep_and_wide, self).__init__()
self.model1 = model1
self.model2 = model2
self.model3 = model3
self.model4 = model4
def call(self, inputs):
x1 = self.mode1([inputs["a"], inputs["b"]])
x2 = self.model2([inputs["a"], inputs["b"]])
x3 = self.model3([inputs["a"], inputs["b"]])
x4 = self.model4([inputs["a"], inputs["b"]])
x = Concatenate()([x1, x2, x3])
x = TimeDistributed(Dense(2))(x)
x = Add()([x, x4])
x_fc = Dense(1)(x)
x_ec = Dense(1)(x)
return x_fc, x_ec
def train_step(self, data):
with tf.GradientTape() as tape:
data = data_adapter.expand_1d(data)
batch_inputs, batch_outputs, sample_weight= data_adapter.unpack_x_y_sample_weight(data)
y_true_fc, y_true_ec = batch_outputs["y_fc"], batch_outputs["y_ec"]
y_pred_fc, y_pred_ec = self(batch_inputs, training=True)
loss_fc = self.compiled_loss(y_true_fc, y_pred_fc)
loss_ec = self.compiled_loss(y_true_ec, y_pred_ec)
print("here")
trainable_variables = self.trainable_variables
print("here")
gradients = tape.gradient([loss_fc, loss_ec], trainable_variables)
print("here")
self.optimizer.apply_gradients(zip(gradients, trainable_variables))
print("here")
And below is my custom loss
class CustomLoss(tf.keras.losses.Loss):
def __init__(self, mask=True, alpha=1, beta=1, gamma=1, dtype=tf.float64):
super(CustomLoss, self).__init__(reduction=tf.keras.losses.Reduction.NONE)
self.mask = mask
self.alpha = alpha
self.beta = beta
self.gamma = gamma
self.dtype = dtype
def call(self, y_true, y_pred):
def loss_fn(y_true, y_pred, mask):
y_true = tf.boolean_mask(y_true, mask)
y_pred = tf.boolean_mask(y_pred, mask)
return tf.keras.losses.MSE(y_true, y_pred)
self.mask = tf.not_equal(y_true, 0.)
y_true = tf.cast(y_true, self.dtype)
y_pred = tf.cast(y_pred, self.dtype)
y_pred = tf.multiply(y_pred, tf.cast(self.mask, dtype=self.dtype))
y_pred_cum = tf.math.cumsum(y_pred, axis=1)
y_pred_cum = tf.multiply(y_pred_cum, tf.cast(self.mask, dtype=self.dtype))
y_true_cum = tf.math.cumsum(y_true, axis=1)
y_true_cum = tf.multiply(y_true_cum, tf.cast(self.mask, dtype=self.dtype))
loss_value = self.alpha * loss_fn(y_true, y_pred, self.mask) + \
self.gamma * loss_fn(y_true_cum, y_pred_cum, self.mask)
return loss_value
And then finally:
optimizer = tf.keras.optimizers.Adam()
loss = CustomLoss()
model.compile(optimizer, loss)
model.fit(train_data, epochs=5, validation_data=val_data)
My data inputs are of size (sequence length, feature length) where sequence length is variable hence I am using tf.data.experimental.bucket_by_sequence_length to pad to max sequence length of the batch (as opposed to batch to max sequence length). All in all, my train and val data are tf.data.Datasets each created using tf.data.experimental.bucket_by_sequence_length where each batch is of size (None, None, feature length).
When I run the above code, I get the following errors and cannot seem to understand where I am going wrong:
Traceback (most recent call last):
File "<input>", line 75, in <module>
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\eager\def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
TypeError: in user code:
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function *
return step_function(self, iterator)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\keras\engine\training.py:790 run_step **
with ops.control_dependencies(_minimum_control_deps(outputs)):
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\ops.py:5359 control_dependencies
return get_default_graph().control_dependencies(control_inputs)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\func_graph.py:362 control_dependencies
return super(FuncGraph, self).control_dependencies(filtered_control_inputs)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\ops.py:4815 control_dependencies
c = self.as_graph_element(c)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\ops.py:3726 as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
C:\Users\\Anaconda3\envs\tf_recsys\lib\site-packages\tensorflow\python\framework\ops.py:3814 _as_graph_element_locked
raise TypeError("Can not convert a %s into a %s." %
TypeError: Can not convert a NoneType into a Tensor or Operation.
The four print statements inserted in the train_step function above are printed.
This NoneType refers to the returned value of the custom train_step, when using a custom train_step you should return something that can be converted into a tensor so that the minimum control dependencies can process it, typically, the loss value as {"loss": loss_value} and potentially some other metrics, or at least an empty dict {}.

InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]

I have been getting this error and i cant figure out the reason. if anyone could help would be great.
this is my code:
import numpy as np
import pickle
import os
import download
#from dataset import one_hot_encoded
#from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from random import shuffle
data_path = "D:/Personal details/Internship/"
# Width and height of each image.
img_size = 32
# Number of channels in each image, 3 channels: Red, Green, Blue.
num_channels = 3
# Length of an image when flattened to a 1-dim array.
img_size_flat = img_size * img_size * num_channels
# Number of classes.
num_classes = 10
# Number of files for the training-set.
_num_files_train = 5
# Number of images for each batch-file in the training-set.
_images_per_file = 10000
def _get_file_path(filename=""):
return os.path.join(data_path, "cifar-10-batches-py/", filename)
def _unpickle(filename):
file_path = _get_file_path(filename)
print("Loading data: " + file_path)
with open(file_path, mode='rb') as file:
# In Python 3.X it is important to set the encoding,
# otherwise an exception is raised here.
data = pickle.load(file, encoding='bytes')
return data
def _convert_images(raw):
# Convert the raw images from the data-files to floating-points.
raw_float = np.array(raw, dtype=float) / 255.0
# Reshape the array to 4-dimensions.
images = raw_float.reshape([-1, num_channels, img_size, img_size])
# Reorder the indices of the array.
images = images.transpose([0, 2, 3, 1])
return images
def _load_data(filename):
# Load the pickled data-file.
data = _unpickle(filename)
# Get the raw images.
raw_images = data[b'data']
# Get the class-numbers for each image. Convert to numpy-array.
cls = np.array(data[b'labels'])
# Convert the images.
images = _convert_images(raw_images)
return images, cls
def load_class_names():
# Load the class-names from the pickled file.
raw = _unpickle(filename="batches.meta")[b'label_names']
# Convert from binary strings.
names = [x.decode('utf-8') for x in raw]
return names
def load_training_data():
images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float)
cls = np.zeros(shape=[_num_images_train], dtype=int)
# Begin-index for the current batch.
begin = 0
# For each data-file.
for i in range(_num_files_train):
# Load the images and class-numbers from the data-file.
images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1))
# Number of images in this batch.
num_images = len(images_batch)
# End-index for the current batch.
end = begin + num_images
# Store the images into the array.
images[begin:end, :] = images_batch
# Store the class-numbers into the array.
cls[begin:end] = cls_batch
# The begin-index for the next batch is the current end-index.
begin = end
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
def load_test_data():
images, cls = _load_data(filename="test_batch")
return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes)
########################################################################
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
class_names = load_class_names()
images_train, cls_train, labels_train = load_training_data()
images_test, cls_test, labels_test = load_test_data()
images_train_train = images_train[0:45000]
validation_train = images_train[45000:50000]
labels_train_train = labels_train[0:45000]
validation_labels = labels_train[45000:]
print(len(images_train_train))
print(len(validation_train))
##print(class_names)
##print(len(images_train))
##print(cls_train)
##print(labels_train)
##print(cls_test)
##print(labels_test)
n_classes = len(class_names)
batch_size = 128
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name='x')
y = tf.placeholder(tf.float32, shape=[None, n_classes], name='y_true')
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([3,3,3,64])),
'W_conv2': tf.Variable(tf.random_normal([3,3,64,128])),
'W_conv3': tf.Variable(tf.random_normal([3,3,128,256])),
'W_conv4': tf.Variable(tf.random_normal([3,3,256,256])),
'W_fc1': tf.Variable(tf.random_normal([256,1024])),
'W_fc2': tf.Variable(tf.random_normal([1024,1024])),
'soft_max': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([64])),
'b_conv2': tf.Variable(tf.random_normal([128])),
'b_conv3': tf.Variable(tf.random_normal([256])),
'b_conv4': tf.Variable(tf.random_normal([256])),
'b_fc1': tf.Variable(tf.random_normal([1024])),
'b_fc2': tf.Variable(tf.random_normal([1024])),
'soft_max': tf.Variable(tf.random_normal([n_classes]))}
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['b_conv3'])
conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['b_conv4'])
conv4 = maxpool2d(conv4)
fc1 = tf.reshape(conv4,[256,-1])
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'] + biases['b_fc2']))
soft_max = tf.matmul(fc2, weights['soft_max']) + biases['soft_max']
return soft_max
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
i = 0
while i < len(images_train_train):
start = i
end = i+batch_size
batch_x = np.array(images_train_train[start:end])
batch_y = np.array(labels_train_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:validation_train, y:validation_labels}))
train_neural_network(x)
Ans this is the error i have been getting.
WARNING:tensorflow:From D:/Personal details/Internship/cifar-10v1.0.py:310: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.
See #{tf.nn.softmax_cross_entropy_with_logits_v2}.
WARNING:tensorflow:From C:\Python35\lib\site-packages\tensorflow\python\util\tf_should_use.py:118: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Traceback (most recent call last):
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 327, in train_neural_network
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
run_metadata_ptr)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
Caused by op 'MatMul', defined at:
File "<string>", line 1, in <module>
File "C:\Python35\lib\idlelib\run.py", line 130, in main
ret = method(*args, **kwargs)
File "C:\Python35\lib\idlelib\run.py", line 357, in runcode
exec(code, self.locals)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 344, in <module>
train_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 309, in train_neural_network
prediction = convolutional_neural_network(x)
File "D:/Personal details/Internship/cifar-10v1.0.py", line 300, in convolutional_neural_network
fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
File "C:\Python35\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2122, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 4567, in mat_mul
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [256,2048], In[1]: [256,1024]
[[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape, Variable_4/read)]]
It looks like the problem is in convolutional_neural_network layer() function wherein somehow it is mad at not being able to multiply the same dimension of the matrix. But it is not clear how to solve the issue
Thank you for the help in advance...
After reshaping conv4 at line fc1 = tf.reshape(conv4,[256,-1]), the shape of fc1 is (256, 2048) and the weight matrix W_fc1 has shape (256, 1024). Thus, you get a size incompatible error at the next line fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1']) + biases['b_fc1'])
in the matrix multiplication part. I suggest you to go through the dimensions at every step manually to find errors in future.

Tensorflow: value error with variable_scope in LSTM

This is my code in tensorflow to train a GAN. I am training des to able to distinguish between fake and original video. I have important not relevant part of code to avoid stack over flow mostly code error
X = tf.placeholder(tf.float32, shape=[None, 28, 28])
D_W1 = tf.Variable(xavier_init([1024, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))
D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))
theta_D = [D_W1, D_W2, D_b1, D_b2]
rnn_size = 1024
rnn_layer = 2
Z = tf.placeholder(tf.float32, shape=[None, 100])
G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))
G_W2 = tf.Variable(xavier_init([128, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))
theta_G = [G_W1, G_W2, G_b1, G_b2]
def sample_Z(m, n):
return np.random.uniform(-1., 1., size=[m, n])
def generator(z):
G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
G_prob = tf.nn.sigmoid(G_log_prob)
G_prob = tf.reshape(G_prob, [-1,28, 28])
return G_prob
def discriminator(x):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
G_sample = generator(Z)
print(G_sample.get_shape())
print(X.get_shape())
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)
D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))
summary_d = tf.summary.histogram('D_loss histogram', D_loss)
summary_g = tf.summary.histogram('D_loss histogram', G_loss)
summary_s = tf.summary.scalar('D_loss scalar', D_loss)
summary_s1 = tf.summary.scalar('scalar scalar', G_loss)
# Add image summary
summary_op = tf.summary.image("plot", image)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)
mb_size = 128
Z_dim = 100
mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
# merged_summary_op = tf.summary.merge_all()
sess = tf.Session()
saver = tf.train.Saver()
writer1 = tf.summary.FileWriter('log/log-sample1', sess.graph)
writer2 = tf.summary.FileWriter('log/log-sample2', sess.graph)
sess.run(tf.global_variables_initializer())
if not os.path.exists('out/'):
os.makedirs('out/')
i = 0
with tf.variable_scope("myrnn") as scope:
for it in range(5000):
X_mb, _ = mnist.train.next_batch(mb_size)
X_mb = tf.reshape(X_mb, [mb_size, -1, 28])
_, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
_, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})
summary_str, eded = sess.run([summary_d, summary_s], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer1.add_summary(summary_str, it)
writer1.add_summary(eded, it)
summary_str1, eded1 = sess.run([summary_g, summary_s1], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
writer2.add_summary(summary_str1, it)
writer2.add_summary(eded1, it)
if it % 1000 == 0:
print('Iter: {}'.format(it))
print('D loss: {:.4}'. format(D_loss_curr))
print('G_loss: {:.4}'.format(G_loss_curr))
print()
save_path = saver.save(sess, "tmp/model.ckpt")
writer1.close()
writer2.close()
`
Following is the error when I run this code please help.
Traceback (most recent call last):
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 104, in <module>
D_fake, D_logit_fake = discriminator(G_sample)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1212, in static_rnn
(output, state) = call_cell()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 64, in discriminator
lstm_multi_fw_cell, x, dtype=tf.float32)
File "/Users/tulsijain/Desktop/Deep Learning Practise/GAN/vanila.py", line 103, in <module>
D_real, D_logit_real = discriminator(X)
It is GAN. I am using MNIST data to train generator and discriminator.
Add a reuse parameter to the BasicLSTMCell. Since you are calling the discriminator function twice and calling reuse=None, both the times, it throws the errors when try to create variables with same name. In this context you need to reuse the variables from the graph for the second call; as you don't need to create new set of variables.
def discriminator(x, reuse):
x = [tf.squeeze(t, [1]) for t in tf.split(x, 28, 1)]
# with tf.variable_scope('cell_def'):
stacked_rnn1 = []
for iiLyr1 in range(rnn_layer):
stacked_rnn1.append(tf.nn.rnn_cell.BasicLSTMCell(num_units=rnn_size, state_is_tuple=True, reuse=reuse))
lstm_multi_fw_cell = tf.contrib.rnn.MultiRNNCell(cells=stacked_rnn1)
# with tf.variable_scope('rnn_def'):
dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
lstm_multi_fw_cell, x, dtype=tf.float32)
D_h1 = tf.nn.relu(tf.matmul(dec_outputs[-1], D_W1) + D_b1)
D_logit = tf.matmul(D_h1, D_W2) + D_b2
D_prob = tf.nn.sigmoid(D_logit)
return D_prob, D_logit
....
D_real, D_logit_real = discriminator(X, None)
D_fake, D_logit_fake = discriminator(G_sample, True)
....

Trying to add CNN to an MLP Siamese

I'm getting an incompatible shape error when trying trying to add a CNN to a ready siamese code that I got from github : here is the link :
https://github.com/ywpkwon/siamese_tf_mnist
here is the code for running the session:
""" Siamese implementation using Tensorflow with MNIST example.
This siamese network embeds a 28x28 image (a point in 784D)
into a point in 2D.
By Youngwook Paul Kwon (young at berkeley.edu)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os
#import helpers
import inference
import visualize
# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()
# setup siamese network
siamese = inference.siamese();
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()
# start training
if new:
for step in range(1000):
batch_x1, batch_y1 = mnist.train.next_batch(128)
batch_x2, batch_y2 = mnist.train.next_batch(128)
batch_y = (batch_y1 == batch_y2).astype('float')
_, loss_v = sess.run([train_step, siamese.loss], feed_dict={
siamese.x1: batch_x1,
siamese.x2: batch_x2,
siamese.y_: batch_y})
if step % 10 == 0:
print ('step %d: loss' % (step))
print (loss_v)
here is the code for creating the Siamese model.
import tensorflow as tf
class siamese:
# Create model
def __init__(self):
self.x1 = tf.placeholder(tf.float32, [None, 784])
self.x2 = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("siamese") as scope:
self.o1 = self.network(self.x1)
scope.reuse_variables()
self.o2 = self.network(self.x2)
# Create loss
self.y_ = tf.placeholder(tf.float32, [None])
self.loss = self.loss_with_step()
def network(self, x):
weights = []
fc1 = self.fc_layer(x, 1024, "fc1" , [5, 5, 1, 32])
return fc1
def fc_layer(self, bottom, n_weight, name,kernel_shape ): #[5, 5, 1, 32]
assert len(bottom.get_shape()) == 2
#n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
weights_for_convolution = tf.get_variable(name+"weights_for_convolution", kernel_shape,
initializer=tf.random_normal_initializer())
bias_shape = kernel_shape[-1]
biases_for_convolution = tf.get_variable(name+"biases_for_convolution", [bias_shape],
initializer=tf.constant_initializer(0.1))
biases_for_connected_layer = tf.get_variable(name+"biases_for_connected_layer", [1024],
initializer=tf.constant_initializer(0.1))
weights_for_connected_layer = tf.get_variable(name+"weights_for_connected_layer", [7*7*64,1024],
initializer=tf.random_normal_initializer())
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[1024,2], initializer=initer)
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[2], dtype=tf.float32))
#weights_for_readout_layer = tf.get_variable("weights_for_readout_layer", [1024,2],
#initializer=tf.random_normal_initializer())
#biases_for_readout_layer = tf.get_variable("biases_for_readout_layer", [2],
#initializer=tf.constant_initializer(0.1))
bottom1 = tf.reshape(bottom,[-1,28,28,1]) ##
c2 = tf.nn.conv2d(bottom1, weights_for_convolution, strides=[1, 1, 1, 1], padding='SAME')
conv = tf.nn.bias_add(c2, biases_for_convolution)
relu = tf.nn.relu(conv)
out = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#print tf.shape(out)
h_out_flat = tf.reshape(out ,[-1,7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_out_flat, weights_for_connected_layer) + biases_for_connected_layer)
#compute model output
final_output = tf.matmul(h_fc1,W) + b
#fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return final_output
def loss_with_spring(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
print tf.shape(eucd2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
# yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
def loss_with_step(self):
margin = 5.0
labels_t = self.y_ #128
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
Actually as the batch size is 128 label-t is 128,
the problem here is that the euclidean distance in the loss_with_step function,
as well as in the loss_with_spring function is of size 256 and not 128 I don't really know why!
here is the error I get.
Traceback (most recent call last):
File "run1.py", line 56, in <module>
siamese.y_: batch_y})
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost/ replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
Caused by op u'y_x_eucd', defined at:
File "run1.py", line 28, in <module>
siamese = inference1.siamese();
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 18, in __init__
self.loss = self.loss_with_step()
File "/home/sudonuma/Documents/siamese for mnist/siamese_tf_mnist-master /inference1.py", line 110, in loss_with_step
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/math_ops.py", line 286, in multiply
return gen_math_ops._mul(x, y, name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/op_def_library.py", line 767, in apply
_op
op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/sudonuma/anaconda2/envs/tensorflow/lib/python2.7/site- packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [128] vs. [256]
[[Node: y_x_eucd = Mul[T=DT_FLOAT, _device="/job:localhost /replica:0/task:0/cpu:0"](_arg_Placeholder_2_0_2, eucd)]]
can anyone help?
Looks like your reshaping after the convolution is wrong. The output of the convolution layer would be 14x14x32 for a 28x28x1 input passed through conv(stride=1)-maxpool(stride 2). So you need to change the flatten layer to :
h_out_flat = tf.reshape(out ,[-1,14*14*32])
and also the weights_for_connected_layer appropriately.

MomentumOptimizer error: Attempting to use uninitialized value Variable_2/Momentum

I'm learning TensorFlow. I was trying tf.train.MomentumOptimizer but I got the following error:
Traceback (most recent call last):
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 70, in run
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
Caused by op u'Momentum/update_Variable_2/ApplyMomentum', defined at:
File "relu.py", line 98, in <module>
learner.run(stop=0.01, print_epoch=True)
File "relu.py", line 55, in run
train_step = self.optimizer.minimize(self.cross_entropy)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 289, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 413, in apply_gradients
update_ops.append(processor.update_op(self, grad))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 61, in update_op
return optimizer._apply_dense(g, self._v) # pylint: disable=protected-access
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/momentum.py", line 69, in _apply_dense
use_nesterov=self._use_nesterov).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 348, in apply_momentum
use_nesterov=use_nesterov, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable_2/Momentum
[[Node: Momentum/update_Variable_2/ApplyMomentum = ApplyMomentum[T=DT_FLOAT, _class=["loc:#Variable_2"], use_locking=false, use_nesterov=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_2, Variable_2/Momentum, Momentum/learning_rate, gradients/add_1_grad/tuple/control_dependency_1, Momentum/momentum)]]
And following is my code:
import time
import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
class ReluMnistNet:
def __init__(self, optimizer=None):
self.varlist = []
self.optimizer = optimizer or tf.train.GradientDescentOptimizer(0.01)
# fetch dataset
self.mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# prepare environment
layers = [ 100 ]
input_layer = 784
output_layer = 10
self.x = tf.placeholder(tf.float32, [None, input_layer])
last_layer = input_layer
y = self.x
for layer in layers:
b = tf.Variable(tf.zeros([layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,layer], stddev=0.01))
self.varlist.append(W)
y = tf.nn.relu( tf.matmul(y,W) ) + b
last_layer = layer
b = tf.Variable(tf.zeros([output_layer]))
self.varlist.append(b)
W = tf.Variable(tf.random_normal([last_layer,output_layer], stddev=0.01))
self.varlist.append(W)
self.y = tf.matmul(y,W) + b
self.y_ = tf.placeholder(tf.float32, [None, 10])
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
def prepare(self):
# init = tf.initialize_variables(self.varlist)
init = tf.initialize_all_variables()
self.sess = tf.Session()
self.sess.run(init)
def run(self, batch_size=100, stop=0.001, print_epoch=False):
mnist = self.mnist
data_size = mnist.train.images.shape[0]
last_accuracy = 0
accuracy_history = []
train_step = self.optimizer.minimize(self.cross_entropy)
time1 = time.time()
for i in range(10000):
for j in range(data_size/batch_size):
# random batch
batch_idx = np.arange(data_size)
np.random.shuffle(batch_idx)
batch_idx = batch_idx[0:batch_size]
batch_xs = mnist.train.images[batch_idx]
batch_ys = mnist.train.labels[batch_idx]
# ordered batch
# start = j * batch_size
# end = (j+1) * batch_size
# batch_xs, batch_ys = mnist.train.images[start:end], mnist.train.labels[start:end]
self.sess.run(train_step, feed_dict={self.x: batch_xs, self.y_: batch_ys})
# test the accuracy
correct_prediction = tf.equal( tf.argmax(self.y,1), tf.argmax(self.y_,1) )
accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )
accuracy = self.sess.run(accuracy, feed_dict = {self.x: mnist.test.images, self.y_: mnist.test.labels})
accuracy_history.append(accuracy)
if print_epoch:
print i, accuracy
if last_accuracy != 0 and abs(last_accuracy-accuracy) < stop:
break
last_accuracy = accuracy
time2 = time.time()
return accuracy_history, (time2-time1)
def close(self):
if not (self.sess is None):
self.sess.close()
self.sess = None
if __name__ == '__main__':
learner = ReluMnistNet()
# learner.optimizer = tf.train.GradientDescentOptimizer(0.01)
learner.optimizer = tf.train.MomentumOptimizer(0.01, momentum=0.9)
for i in range(10):
learner.prepare()
learner.run(stop=0.01, print_epoch=True)
learner.close()
It seems like a variable named Momentum is uninitialized? However, by calling learner.prepare(), I have called tf.initialize_all_variables(). Even more, I have no variable named Momentum. Why does this happens?
In your code you are calling minimize after initializing global variables
instead you have to do:
self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y_) )
self.optimize = self.optimizer.minimize(self.cross_entropy)
and in run function instead of
train_step = self.optimizer.minimize(self.cross_entropy)
you should call
train_step = self.optimize
P.S
Momentun is the default name for the MomentumOptimizer