Tensorflow passing image to simple MNIST Data Model - tensorflow

I have simple model for MNIST data classification with accuracy around 92%.
I would like to know if there is any way I can provide image with digit and get label as output for that digit ? Image can be from mnist test data, rather than custom image, just to avoid image preprocessing? Below is code for my model.
Thanks
import tensorflow as tf
#reset graph
tf.reset_default_graph()
#constants
learning_rate = 0.5
batch_size = 100
training_epochs = 5
logs_path = "/tmp/mnist/2"
#load mnist data set
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
with tf.name_scope('inputs'):
x = tf.placeholder(tf.float32, shape=[None,784], name = "image-input")
y_= tf.placeholder(tf.float32, shape=[None, 10], name = "labels-input")
#weights
with tf.name_scope("weights"):
W = tf.Variable(tf.zeros([784,10]))
#biases
with tf.name_scope("biases"):
b= tf.Variable(tf.zeros([10]))
#Activation function softmax
with tf.name_scope("softmax"):
#y is prediction
y = tf.nn.softmax(tf.matmul(x,W) +b)
#Cost function
with tf.name_scope('cross_entropy'):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),reduction_indices=[1])) #????
#Define Optimizer
with tf.name_scope('train'):
train_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
#Accuracy
with tf.name_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar("cost",cross_entropy)
tf.summary.scalar("accuracy",accuracy)
#Merge all summaries into a single "operation" which will be executed in a session
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
#initialize variables before using them
sess.run(tf.global_variables_initializer())
#log writer object
# writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())
writer = tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
#training cycles
for epoch in range(training_epochs):
#number of batches in one epoch
batch_count = int(mnist.train.num_examples/batch_size)
for i in range(batch_count):
batch_x, batch_y = mnist.train.next_batch(batch_size)
_,summary = sess.run([train_optimizer,summary_op], feed_dict={x: batch_x, y_:batch_y})
writer.add_summary(summary,epoch * batch_count + i)
if epoch % 5 == 0:
print("Epoch: ",epoch)
print("Accuracy: ",accuracy.eval(feed_dict={x: mnist.test.images,y_:mnist.test.labels}))
print("Done")

After you trained the network, you can get the label that the network gives to a new image by doing
new_image_label= sess.run(y, feed_dict={x: new_image})
Note that the format of new_image should be the same as of batch_x. Think about new_image as a batch of size 1, so if batch_x is 2D, new_image should also be 2D (of shape 1 by 784).
In addition, if you did some pre-processing (like normalization for example) to the images in batch_x, you need to do the same thing with new_image.
You could also get the labels of several images simultaneously with the same code as above. Just replace new_image with some 2D array of several images new_images.

Related

How to switch from GradientDescent Optimizer to Adam in Tensorflow

My code is running perfectly with Gradient Descent, but I want to compare the effectiveness of my algorithm using Adam Optimizer, so I tried to modify the following code:
# Import MNIST data
#import input_data
#mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
#fashion_mnist = input_data.read_data_sets('data/fashion')
import tensorflow as tf
# Set parameters
learning_rate = 0.01 #1e-4
training_iteration = 30
batch_size = 100
display_step = 2
# TF graph input
x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
#regularizer = tf.reduce_sum(tf.square(y))
# Create a model
# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
with tf.name_scope("Wx_b") as scope:
# Construct a linear model
model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
# Add summary ops to collect data
w_h = tf.summary.histogram("weights", W)
b_h = tf.summary.histogram("biases", b)
# More name scopes will clean up graph representation
with tf.name_scope("cost_function") as scope:
# Minimize error using cross entropy
# Cross entropy
cost_function = -tf.reduce_sum(y*tf.log(model))
# Create a summary to monitor the cost function
tf.summary.scalar("cost_function", cost_function)
with tf.name_scope("train") as scope:
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
# Initializing the variables
#init = tf.initialize_all_variables()
init = tf.global_variables_initializer()
# Merge all summaries into a single operator
merged_summary_op = tf.summary.merge_all()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary_writer = tf.summary.FileWriter('/home/raed/Tensorflow/tensorflow_demo', graph_def =sess.graph_def)
#writer.add_graph(sess.graph_def)
# Training cycle
for iteration in range(training_iteration):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute the average loss
avg_cost += sess.run(cost_function, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Write logs for each iteration
summary_str = sess.run(merged_summary_op, feed_dict={x: batch_xs, y: batch_ys})
summary_writer.add_summary(summary_str, iteration*total_batch + i)
# Display logs per iteration step
if iteration % display_step == 0:
print ("Iteration:" "%04d" % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
print ("Tuning completed!")
# Test the model
predictions = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(predictions, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
to use Adam Optimizer I tried to change the following line :
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
and replace it with the AdamOptimizer :
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_function)
when I ran the code , I got few iteration and then it stopped with the following error.
InvalidArgumentError (see above for traceback): Nan in summary histogram for: weights
[[Node: weights = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](weights/tag, Variable/read)]]
could you please help me understnad the problem , thanks in advance
the problem is weights are initialized to zero W = tf.Variable(tf.zeros([784, 10])) that`s why you re get Nan as weights.
you need to inialize them with some initializer i.e normal distribution as follow
W = tf.Variable(tf.random_normal([784, 10], stddev=0.35),
name="weights")

tensorflow - linear regression does not give intended computational graph

I am trying to train a very simple linear regression with tensorflow but the loss doesn't decrease and the tensorboard also doesn't look right
### Generate data
w_true = np.array([1.0,2.0])
b_true = 0.5
x_train = np.random.multivariate_normal(mean=[0,0], cov=[[1,0],[0,1]], size=100)
x_test = np.random.multivariate_normal(mean=[0,0], cov=[[3,0],[0,3]], size=100)
y_train = np.dot(x_train,w_true) + b_true
y_test = np.dot(x_test,w_true) + b_true
### Placeholders for data input
x = tf.placeholder(dtype=tf.float32, shape=[None,2], name="x")
y = tf.placeholder(dtype=tf.float32, shape=[None], name="labels")
### Trainable parameters
w = tf.Variable(initial_value=np.random.multivariate_normal([0,0],[[1,0],[0,1]]), dtype=tf.float32,
name="W")
b = tf.Variable(initial_value=np.random.normal(1), dtype=tf.float32,name="B")
### Computational graph
y_pred = tf.tensordot(x,w,1)+b
tf.summary.histogram("weights",w)
tf.summary.histogram("bias",b)
loss = tf.reduce_sum(tf.squared_difference(y,y_pred), name="loss")
tf.summary.scalar("loss", loss)
with tf.name_scope("train"):
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)
### Training
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# For TensorBoard
writer = tf.summary.FileWriter("path_to_some_folder")
writer.add_graph(sess.graph)
for t in range(1000):
x_batch = x_train[np.random.choice(100, 20)]
y_batch = y_train[np.random.choice(100, 20)]
sess.run(train_step, {x:x_batch,y:y_batch})
print(sess.run(loss, {x:x_train,y:y_train}))
print(sess.run(loss, {x:x_test,y:y_test}))
I have tried different step sizes but the error always stays above 400 on the training and 1000 on the test set. I have tested that tf.tensordot() behaves like I expect. I you would like to see the tensorboard just replace the path_to_some_folder and after training run tensorboard --logdir path_to_some_folder
Thanks very much for the help
Your problem is because of the following two lines,
x_batch = x_train[np.random.choice(100, 20)]
y_batch = y_train[np.random.choice(100, 20)]
In each iteration, np.random.choice(100, 20) returns two different index lists for x_batch and y_batch. Therefore, your x_batch and y_batch will never match. Instead, replace that part with the following code.
BATCH_SIZE= 10
N_COUNT = len(x_train)
for t in range(1000):
for start, end in zip(range(0, N_COUNT, BATCH_SIZE),
range(BATCH_SIZE, N_COUNT + 1,BATCH_SIZE)):
x_batch = x_train[start:end]
y_batch = y_train[start:end]
sess.run(train_step, {x:x_batch,y:y_batch})
Hope this helps.

SVM on MNIST data with PCA using tensorflow

I intended to learn about PCA using SVD and therefore implemented it and tried to use it on MNIST data.
import numpy as np
class PCA(object):
def __init__ (self, X):
self.N, self.dim, *rest = X.shape
self.X = X
'''
U S V' = svd(X)
'''
X_std = (X - np.mean(X, axis=0))/(np.std(X, axis=0)+1e-13)
[self.U, self.s, self.Vt] = np.linalg.svd(X_std)
self.V = self.Vt.T
self.variance_ratio = self.s
def variance_explained_ratio (self):
'''
Returns the cumulative variance captured with each added principal component
'''
return np.cumsum(self.variance_ratio)/np.sum(self.variance_ratio)
def X_projected (self, r):
'''
Returns the data X projected along the first r principal components
'''
if r is None:
r = self.dim
X_proj = np.zeros((r, self.N))
P_reduce = self.V[:,0:r]
X_proj = self.X.dot(P_reduce)
return X_proj
Now with this implementation for PCA, I tried to apply it to MNIST data to see the performance with and without PCA for classification using softmax. The code for that is as follows:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# Using first 10000 images
train_data = mnist.train.images[:10000,:]
train_labels = mnist.train.labels[:10000,:]
pca1 = PCA(train_data)
pca_test = PCA(mnist.test.images)
n_components = 14
X_proj1 = pca1.X_projected(r=n_components)
X_projTest = pca_test.X_projected(r=n_components)
t1 = time.time()
x = tf.placeholder(tf.float32, [None, n_components])
W = tf.Variable(tf.zeros([n_components, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.cast(tf.nn.softmax(tf.matmul(x, W) + b), tf.float32)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y),
reduction_indices=[1]))
train_step =
tf.train.GradientDescentOptimizer(0.7).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
m = 10000
for _ in range(1000):
indices = random.sample(range(0, m), 100)
batch_xs = X_proj1[indices]
batch_ys = train_labels[indices]
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy = sess.run(accuracy, feed_dict={x: X_projTest, y_:
mnist.test.labels})
print("Accuracy: %f" % accuracy)
sess.close()
t2 = time.time()
print ("Total time taken: %f seconds" % (t2-t1))
The accuracy I obtain using this is only around 19% whereas with the train_data and train_labels, the accuracy is more than 90%. Could someone suggest where I'm going wrong?
When we use PCA or feature scaling, we set the underlying parameters on the training dataset and then just apply/transform it on the test dataset. The test dataset is not used to calculate the key parameters, or in this case, SVD should only be applied on the train dataset.
e.g. in sklearn's PCA, we use the following code :
from sklearn.decomposition import PCA
pca = PCA(n_components = 'whatever number you want')
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
Note, that we fit on the training dataset, X_train and transform on X_test.
Similarly, for the above implementation, there's no need to create the pca_test object. Tweak the X_projTest variable to :
X_projTest = mnist.test.images.dot(pca1.V[:,0:n_components])
This should solve for the low test accuracy.

Tensorflow does not train CIFAR - 100 data

I am trying to build a linear classifier with CIFAR - 100 using TensorFlow. I got the code from Martin Gorner's MNIST tutorial and change a bit. When I run this code, tensorflow does not training (code is running but accuracy remains 1.0 and loss(cross entropy remains as 4605.17), I don't know what is wrong, I am actually newbie to TF any help is appreciated.
import pickle
import numpy as np
import os
import tensorflow as tf
from tensorflow.python.framework import tensor_util
import math
#imports data
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
cifar100_test = {}
cifar100_train = {}
labelMap = {}
labelNames = {}
# Load the raw CIFAR-10 data.
cifar100_test = unpickle('dataset/cifar-100-python/test')
cifar100_train = unpickle('dataset/cifar-100-python/train')
labelMap = unpickle('dataset/cifar-100-python/meta')
#tr for training data and te for testing data, X is data, Y is label
Xtr = cifar100_train[b'data']
Yr = cifar100_train[b'fine_labels']
Xte = cifar100_test[b'data']
Ye = cifar100_test[b'fine_labels']
classNames = labelMap[b'fine_label_names']
num_train = Xtr.shape[0]
num_test = Xte.shape[0]
num_class = len(classNames)
Ytr = np.zeros([num_train, num_class])
Yte = np.zeros([num_test, num_class])
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
# As a sanity check, we print out the size of the training and test data.
print('Train data shape:', Xtr.shape)
print('Train Label shape:', Ytr.shape)
print('Test data shape:', Xte.shape)
print('Test Label shape:', Yte.shape)
print('Name of Predicted Class:', classNames[0]) #indice of the label name is the indice of the class.
Xtrain = Xtr#[:1000]
Xtest = Xte#[:100]
Ytrain = Ytr#[:1000]
Ytest = Yte#[:100]
print('Train data shape:', Xtrain.shape)
print('Train Label shape:', Ytrain.shape)
print('Test data shape:', Xtest.shape)
print('Test Label shape:', Ytest.shape)
Xtrain = np.reshape(Xtrain,(50000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xtest = np.reshape(Xtest,(10000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xbatches = np.split(Xtrain, 500); #second number is # of batches
Ybatches = np.split(np.asarray(Ytrain), 500);
XtestB = np.split(Xtest, 100);
YtestB = np.split(Ytest, 100);
print('X # of batches:', len(Xbatches))
print('Y # of batches:', len(Ybatches))
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [100, 32, 32, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [100, 100])
# weights W[784, 10] 784=28*28
W = tf.Variable(tf.zeros([3072, 100]))
# biases b[10]
b = tf.Variable(tf.zeros([100]))
# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 3072])
# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)
# loss function: cross-entropy = - sum( Y_i * log(Yi) )
# Y: the computed output vector
# Y_: the desired output vector
# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images,
# *10 because "mean" included an unwanted division by 10
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
for i in range(100):
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))
You compute the accuracy a hundred times after the training process is completed. Nothing will change there. You should place your print('accuracy:'....) within the for loop in which you perform the backpropagation:
for i in range(500):
# the backpropagation training step
t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
print(Loss)
print(i)
print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))
Sorry for the post it turns out that it is a basic mistake.
I changed following;
Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1
with
Ytr[range(num_train), Yr_temp[range(num_train)]] = 1
Yte[range(num_test), Ye_temp[range(num_test)]] = 1
First one make all values 1, but I just wanted to make indice of the true class 1 and other elements 0. Thanks for your time.

Add a summary of accuracy of the whole train/test dataset in Tensorflow

I am trying to use Tensorboard to visualize my training procedure. My purpose is, when every epoch completed, I would like to test the network's accuracy using the whole validation dataset, and store this accuracy result into a summary file, so that I can visualize it in Tensorboard.
I know Tensorflow has summary_op to do it, however it seems only work for one batch when running the code sess.run(summary_op). I need to calculate the accuracy for the whole dataset. How?
Is there any example to do it?
Define a tf.scalar_summary that accepts a placeholder:
accuracy_value_ = tf.placeholder(tf.float32, shape=())
accuracy_summary = tf.scalar_summary('accuracy', accuracy_value_)
Then calculate the accuracy for the whole dataset (define a routine that calculates the accuracy for every batch in the dataset and extract the mean value) and save it into a python variable, let's call it va.
Once you have the value of va, just run the accuracy_summary op, feeding the accuracy_value_ placeholder:
sess.run(accuracy_summary, feed_dict={accuracy_value_: va})
I implement a naive one-layer model as an example to classify MNIST dataset and visualize validation accuracy in Tensorboard, it works for me.
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import os
# number of epoch
num_epoch = 1000
model_dir = '/tmp/tf/onelayer_model/accu_info'
# mnist dataset location, change if you need
data_dir = '../data/mnist'
# load MNIST dataset without one hot
dataset = read_data_sets(data_dir, one_hot=False)
# Create placeholder for input images X and labels y
X = tf.placeholder(tf.float32, [None, 784])
# one_hot = False
y = tf.placeholder(tf.int32)
# One layer model graph
W = tf.Variable(tf.truncated_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[10]))
logits = tf.nn.relu(tf.matmul(X, W) + b)
init = tf.initialize_all_variables()
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)
# loss function
loss = tf.reduce_mean(cross_entropy)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
_, top_1_op = tf.nn.top_k(logits)
top_1 = tf.reshape(top_1_op, shape=[-1])
correct_classification = tf.cast(tf.equal(top_1, y), tf.float32)
# accuracy function
acc = tf.reduce_mean(correct_classification)
# define info that is used in SummaryWritter
acc_summary = tf.scalar_summary('valid_accuracy', acc)
valid_summary_op = tf.merge_summary([acc_summary])
with tf.Session() as sess:
# initialize all the variable
sess.run(init)
print("Writing Summaries to %s" % model_dir)
train_summary_writer = tf.train.SummaryWriter(model_dir, sess.graph)
# load validation dataset
valid_x = dataset.validation.images
valid_y = dataset.validation.labels
for epoch in xrange(num_epoch):
batch_x, batch_y = dataset.train.next_batch(100)
feed_dict = {X: batch_x, y: batch_y}
_, acc_value, loss_value = sess.run(
[train_op, acc, loss], feed_dict=feed_dict)
vsummary = sess.run(valid_summary_op,
feed_dict={X: valid_x,
y: valid_y})
# Write validation accuracy summary
train_summary_writer.add_summary(vsummary, epoch)
Using batching with your validation set is possible in case you are using tf.metrics ops, which use internal counters. Here is a simplified example:
model = create_model()
tf.summary.scalar('cost', model.cost_op)
acc_value_op, acc_update_op = tf.metrics.accuracy(labels,predictions)
summary_common = tf.summary.merge_all()
summary_valid = tf.summary.merge([
tf.summary.scalar('accuracy', acc_value_op),
# other metrics here...
])
with tf.Session() as sess:
train_writer = tf.summary.FileWriter(logs_path + '/train',
sess.graph)
valid_writer = tf.summary.FileWriter(logs_path + '/valid')
While training, only write the common summary using your train-writer:
summary = sess.run(summary_common)
train_writer.add_summary(summary, tf.train.global_step(sess, gstep_op))
train_writer.flush()
After every validation, write both summaries using the valid-writer:
gstep, summaryc, summaryv = sess.run([gstep_op, summary_common, summary_valid])
valid_writer.add_summary(summaryc, gstep)
valid_writer.add_summary(summaryv, gstep)
valid_writer.flush()
When using tf.metrics, don't forget to reset the internal counters (local variables) before every validation step.