Tensorflow model doesn't reduce loss value on MNIST example - tensorflow

I'm trying Tensorflow and tried to re-write a simple MNIST example with minor changes. I expect to see a reduction in the value of loss function after running the code while this does not happen.
I compared my code to many examples but was not able to figure out the problem.
Here is my code:
import numpy as np
import tensorflow as tf
BATCH_SIZE = 100
# Data Placeholders
t = tf.placeholder(tf.bool, name='IfTrain_placeholder') # if we are in training phase
X = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1], name='Data_placeholder')
y = tf.placeholder(dtype=tf.int32, shape=[None], name='Label_placeholder')
# Use Datasets to manage data
X_data = tf.data.Dataset.from_tensor_slices(X).batch(BATCH_SIZE)
y_data = tf.data.Dataset.from_tensor_slices(y).batch(BATCH_SIZE)
X_iter = X_data.make_initializable_iterator()
X_batch = X_iter.get_next()
y_iter = y_data.make_initializable_iterator()
y_batch = y_iter.get_next()
oh_y = tf.one_hot(indices=y_batch, depth=10)
# Model structure here
c1 = tf.layers.conv2d(inputs=X_batch,
filters=32,
kernel_size=[5,5],
padding='same',
activation=tf.nn.relu,
name='CNN1')
m1 = tf.layers.max_pooling2d(inputs=c1,
pool_size=[2,2],
strides=2,
padding='same',
name='MaxPool1')
c2 = tf.layers.conv2d(inputs=m1,
filters=64,
kernel_size=[5,5],
padding='same',
activation=tf.nn.relu,
name='CNN2')
m2 = tf.layers.max_pooling2d(inputs=c2,
pool_size=[2,2],
strides=2,
padding='same',
name='MaxPool2')
f1 = tf.reshape(tensor=m2, shape=[-1, 7*7*64], name='Flat1')
d1 = tf.layers.dense(inputs=f1,
units=1024,
activation=tf.nn.softmax,
name='Dense1')
dr1 = tf.layers.dropout(inputs=d1, rate=0.4, training=t, name='Dropout1')
d2 = tf.layers.dense(inputs=dr1,
units=10,
activation=tf.nn.softmax,
name='Dense2')
# Loss and otimization
loss = tf.losses.softmax_cross_entropy(onehot_labels=oh_y, logits=d2)
classes = tf.argmax(input=d2, axis=1, name='ArgMax1')
init = tf.global_variables_initializer()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.003, name='GD1')
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step(), name='Optimizer1')
# Get data
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
X_train = np.reshape(mnist.train.images, (-1, 28, 28, 1))
y_train = np.asarray(mnist.train.labels, dtype=np.int32)
X_test = np.reshape(mnist.test.images, (-1, 28, 28, 1))
y_test = np.asarray(mnist.test.labels, dtype=np.int32)
# Run session
with tf.Session() as sess:
sess.run(init)
sess.run(X_iter.initializer, feed_dict={X:X_train})
sess.run(y_iter.initializer, feed_dict={y:y_train})
while True:
try:
out = sess.run({'accuracy': accuracy, 'loss': loss, 'train optimizer': train_op}, feed_dict={t:True})
print(out['loss'])
except:
break
I appreciate if anyone can help me find the problem.

Related

Input shape of initial_state of tf.keras.layers.LSTM

Here I want to construct a very basic and simple character-wise RNN.
suppose that my dataset is embedded like this:
import numpy as np
batch_1 = np.array([[1, 2, ...., 20], [21, .....,40], [41,....,60], [61,...., 80]])
batch_2 = np.array([[...], [...], [...], [...]])
import tensorflow as tf
batch_size = 4
steps_number = 20
hidden_units = 100
keep_prob = 0.5
dim = tf.zeros([batch_size, hidden_units])
input_data = tf.keras.layers.Input(shape=(1, steps_number), batch_size=batch_size)
hidden_1, state_h, state_c = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=True)(input_data, initial_state=[dim, dim], training=True)
hideen_2 = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=False)(hidden_1, initial_state=[state_h, state_c], training=True)
hidden3 = tf.keras.layers.Dense(10, activation='relu')(hidden_1)
output = tf.keras.layers.Dense(1, activation='sigmoid')(hidden3)
model = tf.keras.models.Model(input_data, output)
Here I got this error in the hidden_2 layer:
ValueError: Shape (100, 4) must have rank at least 3
The problem is that the output of hidden_1 layer size should be [batch_size, steps_number, hidden_units]
Here is the working solution however, I dont I understand why I have to specify the Input shape in term of colum array:
shape=(steps_number,1) instead of (1,steps_number)
import tensorflow as tf
batch_size = 4
steps_number = 20
hidden_units = 100
keep_prob = 0.5
dim = tf.zeros([batch_size, hidden_units])
input_data = tf.keras.layers.Input(shape=(steps_number,1), batch_size=batch_size)
hidden_1, state_h, state_c = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=True, return_sequences=True)(input_data, initial_state=[dim, dim], training=True)
print(hidden_1.get_shape().as_list)
hideen_2 = tf.keras.layers.LSTM(units=hidden_units, stateful=True, dropout=keep_prob, return_state=False)(hidden_1, initial_state=[state_h, state_c], training=True)
hidden3 = tf.keras.layers.Dense(10, activation='relu')(hidden_1)
output = tf.keras.layers.Dense(1, activation='sigmoid')(hidden3)
model = tf.keras.models.Model(input_data, output)

how to get an array of predictions from tensor flow classification model

I have the following classification model.
I would like to get a numpy array similar to y_t which is the test labels one hot encoded. However I keep getting variable error.
# Construct placeholders
with graph.as_default():
inputs_ = tf.placeholder(tf.float32, [None, seq_len, n_channels], name = 'inputs')
labels_ = tf.placeholder(tf.float32, [None, n_classes], name = 'labels')
keep_prob_ = tf.placeholder(tf.float32, name = 'keep')
learning_rate_ = tf.placeholder(tf.float32, name = 'learning_rate')
with graph.as_default():
# (batch, 100, 3) --> (batch, 50, 6)
conv1 = tf.layers.conv1d(inputs=inputs_, filters=6, kernel_size=2, strides=1,
padding='same', activation = tf.nn.relu)
max_pool_1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding='same')
with graph.as_default():
# Flatten and add dropout
flat = tf.reshape(max_pool_1, (-1, 6*6))
flat = tf.nn.dropout(flat, keep_prob=keep_prob_)
# Predictions
logits = tf.layers.dense(flat, n_classes)
# Cost function and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Then I use the test set
with tf.Session(graph=graph) as sess:
# Restore
saver.restore(sess, tf.train.latest_checkpoint('bschkpnt-cnn'))
for x_t, y_t in get_batches(X_test, y_test, batch_size):
feed = {inputs_: x_t,
labels_: y_t,
keep_prob_: 1}
batch_acc = sess.run(accuracy, feed_dict=feed)
test_acc.append(batch_acc)
print("Test accuracy: {:.6f}".format(np.mean(test_acc)))
y_t is a nX3 bumpy array.
I want to get a y_pred in similar format
Thanks
soft = tf.nn.softmax(logits)
this will be your probability distribution such that sum(soft) = 1. Every value in this array will indicate how sure the model is about the class.
pred = sess.run(soft, feed_dict=feed)
print(pred)
So basically all I do is place an additional softmax, since you have it inbuilt in the loss you calculate, you've to place it again to predict. Then I ask for the output prediction, and just feed the feed_dict again.
Hope this helped!

ValueError: Cannot feed value of shape (128, 28, 28) for Tensor 'Placeholder:0', which has shape '(?, 784)'

I am new to Tensorflow and Machine Learning and trying out CNN using Tensorflow with my custom input data. But I am getting the error attached below.
The Data or Image Size is 28x28 with 15 Labels.
I am not getting the numpy reshape thing in this script or the error.
Help is highly appreciated.
import tensorflow as tf
import os
import skimage.data
import numpy as np
import random
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".jpg")]
for f in file_names:
images.append(skimage.data.imread(f))
labels.append(d)
print(str(d)+' Completed')
return images, labels
ROOT_PATH = "H:\Testing\TrainingData"
train_data_directory = os.path.join(ROOT_PATH, "Training")
test_data_directory = os.path.join(ROOT_PATH, "Testing")
print('Loading Data...')
images, labels = load_data(train_data_directory)
print('Data has been Loaded')
n_classes = 15
training_examples = 10500
test_examples = 4500
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def neural_network_model(x):
weights = {'W_Conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_Conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_FC':tf.Variable(tf.random_normal([7*7*64, 1024])),
'Output':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'B_Conv1':tf.Variable(tf.random_normal([32])),
'B_Conv2':tf.Variable(tf.random_normal([64])),
'B_FC':tf.Variable(tf.random_normal([1024])),
'Output':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1,28,28,1])
conv1 = conv2d(x, weights['W_Conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['W_Conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_FC'])+biases['B_FC'])
output = tf.matmul(fc, weights['Output'])+biases['Output']
return output
def next_batch(num, data, labels):
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(training_examples/batch_size)):
epoch_x, epoch_y = next_batch(batch_size, images, labels)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x: images, y: labels}))
print('Training Neural Network...')
train_neural_network(x)
What am I doing wrong? What is needed to be fixed and how do I fix the shape of numpy array?
If you look closely, you'll see that you have two x placeholders:
x = tf.placeholder('float', [None, 784]) # global
...
x = tf.reshape(x, shape=[-1,28,28,1]) # in neural_network_model
One of them is in the function scope, hence not visible in train_neural_network, so tensorflow takes the one with [?, 784] shape. You should get rid of one of them.
Also note that your training data has the rank 3, i.e. [batch_size, 28, 28], so it's not directly compatible with any of those placeholders.
To feed it into the first x, take epoch_x.reshape([-1, 784]). For the second placeholder (once you make it visible), take epoch_x.reshape([-1, 28, 28, 1]).

tensorflow tf.nn.bidirectional_dynamic_rnn error after tf.reshape

My tensorflow version is 1.3.0 .
My python version is 3.5.
I implement CNN followed by bid-LSTM. and I run code on CPU.
After implementing CNN, pool2's shape will be [batch_size(None), None, 106, 64]. Then tf.reshape(pool2, [-1, tf.shape(pool2)[1], tf.shape(pool2)[2]xtf.shape(pool2)[3]]) . I hope tf.reshape can reshape 4D into 3D on pool2. And then feed it bid-LSTM, but tf.nn.bidirectional_dynamic_rnn happen wrong.
It says "Input size (depth of inputs) must be accessible via shape inference," ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
I haven't found the solution to the problem for a long time. Maybe I use wrong keyword to search on Internet. Or give some right keyword to me.
x = tf.placeholder('float', shape=[None, None, 108])
y = tf.placeholder('float')
n_steps = tf.placeholder('int64')
def CNN(x):
input_layer = tf.reshape(x, [-1, tf.shape(x)[1], 108, 1])
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 3], padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=1)
conv2 = tf.layers.conv2d(inputs=conv1, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=1)
output = tf.reshape(pool2, [-1, tf.shape(pool2)[1], tf.shape(pool2)[2]*tf.shape(pool2)[3]])
return output
def recurrent_neural_network(x):
layer1 = {'weights':tf.Variable(tf.random_normal([rnn_size*2,n_classes])),'biases':tf.Variable(tf.random_normal([n_classes]))}
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=x, dtype=tf.float32) #[batch_size, max_time, cell_output_size]
outputs = tf.concat(outputs, 2)
max_length = tf.shape(outputs)[1]
outputs = tf.reshape(outputs, [-1, rnn_size*2])
prediction = tf.matmul(outputs,layer1['weights']) + layer1['biases']
prediction = tf.reshape(prediction, [-1, max_length, n_classes])
return prediction
def train_neural_network(x):
CNN_result = CNN(x)
prediction = recurrent_neural_network(CNN_result)
tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y) )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
i=0
while i < len(train_X):
start = i
end = i+batch_size
batch_x = train_X[start:end]
batch_y = train_Y[start:end]
batch_sen_len = train_sen_len[start:end]
max_batch_sen_len = max(batch_sen_len)
#padding zero
for j in range(len(batch_x)):
k = max_batch_sen_len - len(batch_x[j])
for _ in range(k):
batch_x[j].append([0]*108)
batch_y[j].append([0]*48)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, n_steps: batch_sen_len})
epoch_loss += c
i+=batch_size
print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)

Output of loss is None

I have to finetune VGG.There are five convolutional layers and then three fully connected layers. Output from the last fully connected layer is the input of the loss function. Following is my code:
class vgg16:
def __init__(self, imgs1,imgs2, weights=None, sess=None):
self.imgs1 = imgs1
self.imgs2 = imgs2
with tf.variable_scope("siamese") as scope:
self.o1 = self.convlayers(imgs1)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
scope.reuse_variables()
self.o2 = self.convlayers(imgs2)
self.fc_layers()
self.loss()
if weights is not None and sess is not None:
self.load_weights(weights, sess)
#create loss function
def convlayers(self,imgs):
....
# conv1_2
with tf.name_scope('conv1_2') as scope:
......
# pool1
..
)
.....
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
....
# fc2
with tf.name_scope('fc2') as scope:
...
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
def loss(self):
loss=tf.nn.l2_loss(self.fc3l)
self.train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
if __name__ == '__main__':
sess = tf.Session()
imgs1 = tf.placeholder(tf.float32, [None, 224, 224, 3])#jis size ka bhi imaeg hai usko 224x224 may kar diya or RGB chaeay hmay
imgs2 = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
img1 = imread('laska.png', mode='RGB')
img1 = imresize(img1, (224, 224))
img2 = imread('laska2.jpg', mode='RGB')
img2 = imresize(img2,(224, 224))
prob = sess.run(vgg.train_step, feed_dict={vgg.imgs1: [img1],vgg.imgs2: [img2]})
print('loss is:')
print(prob)
The problem is that the output of prob is None. Kindly indicate what I am doing wrong.
PS: I am following siamese architecture. Input to both branches are different images here.
The op self.train_step does not return anything, it just calculates gradients and updates variables. See here.
What you need to do is to save reference to loss tensor in your vgg16 class like this:
self.loss=tf.nn.l2_loss(self.fc3l)
and then execute both train_step and loss operations in single sess.run:
_, loss_value = sess.run([vgg.train_step, vgg.loss], feed_dict=...)
print('loss is:')
print(loss_value)