I wrote this code,(almost are from tutorial, I just modified a few lines)
and this is not working.
from mxnet import gluon
from mxnet.gluon import nn
ctx = mx.gpu()
def data_xform(data):
"""Move channel axis to the beginning, cast to float32, and normalize to [0, 1]."""
return nd.moveaxis(data, 2, 0).astype('float32') / 255
# prepare data
train_data =
val_data =
batch_size = 100
train_loader =, shuffle=True, batch_size=batch_size)
val_loader =, shuffle=False, batch_size=batch_size)
# create network
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
net= gluon.SymbolBlock(outputs=[fc3], inputs=[data])
# create trainer, metric
trainer = gluon.Trainer(
optimizer_params={'learning_rate': 0.1, 'momentum':0.9, 'wd':0.00001},
metric = mx.metric.Accuracy()
# learn
num_epochs = 10
for epoch in range(num_epochs):
for inputs, labels in train_loader:
inputs = inputs.as_in_context(ctx)
labels = labels.as_in_context(ctx)
with autograd.record():
outputs = net(inputs)
# softmax
exps = nd.exp(outputs - outputs.min(axis=1).reshape((-1,1)))
exps = exps / exps.sum(axis=1).reshape((-1,1))
# cross entropy
loss = nd.MakeLoss(-nd.log(exps.pick(labels)))
#loss = gluon.loss.SoftmaxCrossEntropyLoss()(outputs, labels)
metric.update(labels, outputs)
name, acc = metric.get()
print('After epoch {}: {} = {}'.format(epoch + 1, name, acc))
If I use gluon.loss.SoftmaxCrossEntropyLoss, this runs well..
When I print loss in both cases, output values are look same.
What are the differences?
Thank you for advance

I am not entirely sure, why you subtract outputs.min() when calculating softmax. Original softmax function doesn't do anything like that - If you don't do that, you will get a good value of accuracy:
# softmax
exps = nd.exp(outputs)
exps = exps / exps.sum(axis=1).reshape((-1, 1))
# cross entropy
loss = nd.MakeLoss(-nd.log(exps.pick(labels)))
I get:
After epoch 1: accuracy = 0.89545
After epoch 2: accuracy = 0.9639
After epoch 3: accuracy = 0.97395
After epoch 4: accuracy = 0.9784
After epoch 5: accuracy = 0.98315


Network bug - Inception v1 isn't training

I am trying to use the Inception model (GoogLeNet) from this link which is implemented by Google using the Tensorflow Slim API, to classify images from the Cifar10 dataset (dataset link The problem is that the network cost stays almost constant and I can't find the bug. I am very new to tensorflow and slim, so I whould really appreciate any help.
I am using these packages:
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops
import matplotlib.pyplot as plt
import os
import pickle
import cv2
from sklearn import model_selection as ms
from nets import inception_v1,inception_utils
import math
%matplotlib inline
And I made theese two functions:
def one_hot_matrix(labels, C):
C = tf.constant(C,name='C')
one_hot_matrix = tf.one_hot(labels,C,axis=0)
sess = tf.Session()
one_hot =
return one_hot
def make_mini_batches(X, Y, mini_batch_size):
m = X.shape[0]
mini_batches = []
# number of mini batches of size mini_batch_size in the dataset
num_complete_minibatches = math.floor(m/mini_batch_size)
for k in range(0, num_complete_minibatches):
mini_batch_X = X[k*mini_batch_size : (k+1)*mini_batch_size,...]
mini_batch_Y = Y[k*mini_batch_size : (k+1)*mini_batch_size,:]
mini_batch = (mini_batch_X, mini_batch_Y)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = X[num_complete_minibatches*mini_batch_size:,...]
mini_batch_Y = Y[num_complete_minibatches*mini_batch_size:,:]
mini_batch = (mini_batch_X, mini_batch_Y)
return mini_batches
First, I am reading the dataset:
# function to read the batches
def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
# note the encoding type is 'latin1'
batch = pickle.load(file, encoding='latin1')
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
labels = batch['labels']
datadict = {'data':features,'labels':labels}
return datadict
# combine batches into one dataset (batch size: 10000)
full_data = load_cfar10_batch('./cifar_10',1)['data']
full_labels = []
for i in range(5):
if i > 0:
full_data = np.concatenate((full_data,load_cfar10_batch('./cifar_10',i+1)['data']),axis = 0)
# dataset sizes
full_data.shape, len(full_labels)
Followed by some preprocessing and train/validation split:
# data preprocessing (using only 1/10 of the dataset for speed)
X = full_data[0:5000]
y = one_hot_matrix(full_labels[0:5000], 10).T
# split into training-validation sets
x_train, x_val, y_train, y_val = ms.train_test_split(X, y, test_size=0.2, random_state=1)
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_train = x_train / 255.0
x_val = x_val / 255.0
print('x_train shape:',x_train.shape)
print('y_train shape:',y_train.shape)
print('x_val shape:',x_val.shape)
print('y_val shape:',y_val.shape)
Then I initialize the variables:
seed = 3
(m, n_H, n_W, n_C) = x_train.shape
n_y = y_train.shape[1]
costs = []
print_cost = True
learning_rate = 0.001
num_epochs = 100
minibatch_size = 256
num_minibatches = int(m / minibatch_size)
minibatches = make_mini_batches(x_train, y_train, minibatch_size)
inputs = tf.placeholder(tf.float32,shape=[None, n_H, n_W, n_C],name = 'inputs')
labels = tf.placeholder(tf.int8,shape=[None, n_y],name = 'labels')
# Forward propagation (Inception)
Z = inception_v1.inception_v1(inputs,num_classes = n_y,dropout_keep_prob=1,global_pool=True)[1]['Logits']
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = Z, labels = labels))
# ADAM optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize variables
init = tf.global_variables_initializer()
And then the training loop:
with tf.Session() as sess:
for epoch in range(num_epochs):
# learning rate decay
if epoch % 8 == 0:
learning_rate *= math.pow(0.95,epoch/8)
minibatch_cost = 0.
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_ , temp_cost =[optimizer, cost], feed_dict={inputs: minibatch_X, labels: minibatch_Y})
minibatch_cost += temp_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 5 == 0:
print ("Cost after epoch %i: %f" % (epoch, minibatch_cost),", Learning rate: %f" %(learning_rate))
if print_cost == True and epoch % 1 == 0:
# Plot the cost
plt.xlabel('Iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
# Calculate the correct predictions
predict_op = tf.argmax(Z, 1)
correct_prediction = tf.equal(predict_op, tf.argmax(labels, 1))
# Calculate accuracy on the validation set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
train_accuracy = accuracy.eval({inputs: x_train, labels: y_train})
val_accuracy = accuracy.eval({inputs: x_val, labels: y_val})
print("Train Accuracy:", train_accuracy)
print("Validation Accuracy:", val_accuracy)
The output seems like this:
Cost after epoch 0: 2.455999 , Learning rate: 0.001000
Cost after epoch 5: 2.454697 , Learning rate: 0.001000
Cost after epoch 10: 2.454670 , Learning rate: 0.000950
Cost after epoch 15: 2.454655 , Learning rate: 0.000950
Cost after epoch 20: 2.454650 , Learning rate: 0.000857
Cost after epoch 25: 2.454649 , Learning rate: 0.000735
Cost after epoch 30: 2.454659 , Learning rate: 0.000735
Cost after epoch 35: 2.454643 , Learning rate: 0.000599
Cost after epoch 40: 2.454627 , Learning rate: 0.000463
And so my network is not training.
I managed to find the solution. I had to put the argument scope of inception before calling it, something like this:
with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
Z = inception_v1.inception_v1(inputs,num_classes = n_y,dropout_keep_prob=1,global_pool=True)[1]['Logits']
After that, everything works just fine.

How to fix the fetch argument error in implementing Bayesian Neural Network with tenssorflow

placeholder_X = tf.placeholder(tf.float32, shape = [None, 19])
placeholder_y = tf.placeholder(tf.float32, shape = [None,1])
#Build an iterator over training batches
#training_dataset =, y_train))
training_dataset =, placeholder_y))
#Shuffle the dataset (note shuffle argument much larger than training size).learning_rate # shuffling of data
# and form batches of size batch_size
training_batches = training_dataset.shuffle(20000, reshuffle_each_iteration =True).repeat().batch(FLAGS.batch_size)
#training_iterator =
#Building iterator over the heldout set with batch_size = heldout_size,
# i.e., return the entire heldout set as a constant.
val_dataset =, placeholder_y))
val_batches = val_dataset.repeat().batch(500)
#heldout_iterator =
test_dataset =,y_test))
test_dataset = test_dataset.batch(500)
#Combine these into a feasible iterator that can switch between training
# and validation inputs.
# Here should be minibatch increment be defined
handle = tf.placeholder(tf.string, shape = [])
feedable_iterator =, training_batches.output_types, training_batches.output_shapes)
features_final, labels_final = feedable_iterator.get_next()
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator =, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
def main(argv):
# extract the activation function from the hyperopt spec as an attribute from the tf.nn module
#activation = getattr(tf.nn, FLAGS.activation_function)
# define the graph
#with tf.Graph().as_default():
# Building the Bayesian Neural Network
# we are Gaussian Reparametrization Trick
# to compute the stochastic gradients as described in the paper
with tf.compat.v1.name_scope("bayesian_neural_net", values =[features_final]):
neural_net = tf.keras.Sequential()
for i in range(FLAGS.num_hidden_layers):
layer = tfp.layers.DenseReparameterization(
units = 10,
activation = tf.nn.relu,
trainable = True,
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
#kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
units=2, # one dimensional output
activation= tf.nn.softmax, # since regression (outcome not bounded)
trainable=True, # i.e subject to optimization
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
kernel_posterior_tensor_fn=lambda x: x.sample(),
bias_prior_fn =tfp.layers.default_multivariate_normal_fn, # NormalDiag with hyperopt sigma
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(), # softplus(sigma)
bias_posterior_tensor_fn=lambda x: x.sample()
logits = neural_net(features_final)
#labels_distribution = tfd.Bernoulli(logits=logits)
labels_distribution = tfd.Categorical(logits=logits)
#labels_distribution = tfd.Bernoulli(logits=logits)
# Perform KL annealing. The optimal number of annealing steps
# depends on the dataset and architecture.
t = tf.Variable(0.0)
kl_regularizer = t / (FLAGS.kl_annealing * len(X_train) / FLAGS.batch_size)
#Compute the -ELBO as the loss. The kl term is annealed from 1 to 1 over
# the epochs specified by the kl_annealing flag.
log_likelihood = labels_distribution.log_prob(labels_final)
#neg_log_likelihood = tf.reduce_mean(tf.squared_difference(logits,labels_final))
neg_log_likelihood = -tf.reduce_mean(input_tensor = log_likelihood)
kl = sum(neural_net.losses)/len(X_train) * tf.minimum(1.0, kl_regularizer)
elbo_loss = neg_log_likelihood + kl
# Build metrics for evaluation. Predictions are formed from single forward
# pass of the probablisitic layers . They are cheap but noisy predictions
predictions = tf.argmax(input = logits, axis=1)
predictions = tf.cast(predictions, tf.float32)
# TP, TN, FP, FN
TP = tf.count_nonzero(predictions * labels_final)
TN = tf.count_nonzero((predictions - 1) * (labels_final - 1))
FP = tf.count_nonzero(predictions * (labels_final - 1))
FN = tf.count_nonzero((predictions - 1) * labels_final)
# precision, recall, f1
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * precision * recall / (precision + recall)
tpr = TP/(TP+FN)
fpr = FP/(TP+FN)
#create Reinitializable iterator for Train and Validation, one hot iterator for Test
train_val_iterator =, training_batches.output_shapes)
training_iterator = train_val_iterator.make_initializer(training_batches)
val_iterator = train_val_iterator.make_initializer(val_batches)
test_iterator = test_dataset.make_one_shot_iterator()
with tf.compat.v1.name_scope("train"):
train_accuracy, train_accuracy_update_op = tf.metrics.accuracy(labels=labels_final,predictions =predictions)
opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
train_op = opt.minimize(elbo_loss)
update_step_op = tf.assign(t, t+1)
with tf.compat.v1.name_scope("valid"):
valid_accuracy, validation_accuracy_update_op = tf.metrics.accuracy(labels= labels_final,predictions = predictions)
with tf.compat.v1.name_scope("test"):
test_accuracy, test_accuracy_update_op = tf.metrics.accuracy(labels = labels_final,predictions = predictions)
init_op =,
saver = tf.train.Saver()
stream_vars_valid = [ v for v in tf.local_variables() if "valid" in]
reset_valid_op = tf.variables_initializer(stream_vars_valid)
valid_accuracy_summary = []
stop_early =0
with tf.compat.v1.Session() as sess:
# Run the training loop
train_val_string, test_string =[
training_steps = int(round(FLAGS.epochs * (len(X_train) / FLAGS.batch_size)))
for step in range(training_steps):
#start reininitializable's train iterator, feed_dict = {placeholder_X:X_train, placeholder_y:y_train})
_ =[train_op,train_accuracy_update_op, update_step_op],feed_dict={handle: train_val_string})
# Manually print the frequency
if step % 100 == 0:
save_path =, "/tmp/my_model.ckpt")
loss_value, accuracy_value, kl_value =[elbo_loss, train_accuracy, kl], feed_dict= {handle: train_val_string})
print("Step:{:>3d} loss : {:.3f} KL: {:.3f}" .format(step , loss_value, accuracy_value, kl_value))
if (step +1) % FLAGS.eval_freq ==0:
# Compute log prob of heldout set by averaging draws from the model:
# p(heldout | train) = int_model p(heldout|model) p(model|train) ~= 1/n * sum_{i=1}^n p(heldout | model_i)
# where model_i is a draw from the posterior
probs = np.asarray([,
feed_dict ={handle: train_val_string})
for _ in range(FLAGS.num_monte_carlo)])
mean_probs = np.mean(probs, axis =0).astype(np.int32)
_, label_vals =, labels_final), feed_dict = {handle: train_val_string})
label_vals = (label_vals).astype(np.int32)
heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]), label_vals]))
print(" ...Held_out nats: {:.3f}".format(heldout_lp))
# Calculate validation accuracy
for step in range(10):
#start reinitializable's validation iterator, feed_dict = {placeholder_X:X_val, placeholder_y:y_val}), feed_dict={handle:train_val_string})
valid_value =, feed_dict={handle:train_val_string})
if valid_value < max(valid_accuracy_summary) and step > 100:
stop_early += 1
if stop_early == 40:
stop_early = 0
print("Validation Accuracy: {:.3f}".format(valid_value))
#Feed to r=feedable iterator the string handle
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 =[test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
print("Step: {:>3d} test Accuracy: {:.3f} Precision: {:.3f} Recall: {:.3f} ".format(step, test_value, precision_value, recall_value))
print("Step: {:>3d} fpr: {:.3f} tpr: {:.3f} f1_1: {:.3f}".format( step, fpr_value, tpr_value,f1))
if __name__ == "__main__":
Expect the output to progress but it is giving out this error
Step: 0 loss : 0.646 KL: 0.875
Step:100 loss : 0.654 KL: 0.904
Step:200 loss : 0.657 KL: 0.906
Step:300 loss : 0.648 KL: 0.906
/usr/local/lib/python3.6/dist-packages/ RuntimeWarning: divide by zero encountered in log
...Held_out nats: -inf
Validation Accuracy: 0.914
Step: 9 test Accuracy: 0.000 Precision: 0.910 Recall: 1.000
Step: 9 fpr: 0.099 tpr: 1.000 f1_1: 0.953
Step:400 loss : 0.624 KL: 0.906
Step:500 loss : 0.641 KL: 0.906
Step:600 loss : 0.612 KL: 0.906
Step:700 loss : 0.579 KL: 0.906
...Held_out nats: -inf
Validation Accuracy: 0.914
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/ in __init__(self, fetches, contraction_fn)
302 self._unique_fetches.append(ops.get_default_graph().as_graph_element(
--> 303 fetch, allow_tensor=True, allow_operation=True))
304 except TypeError as e:
14 frames
TypeError: Can not convert a float64 into a Tensor or Operation.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/ in __init__(self, fetches, contraction_fn)
305 raise TypeError('Fetch argument %r has invalid type %r, '
306 'must be a string or Tensor. (%s)' %
--> 307 (fetch, type(fetch), str(e)))
308 except ValueError as e:
309 raise ValueError('Fetch argument %r cannot be interpreted as a '
The exception arises because you use same name f1 as assignment, we need to change name f1 at left side.
test_value, precision_value, recall_value, fpr_value, tpr_value,f1 =[test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
change the line to
test_value, precision_value, recall_value, fpr_value, tpr_value,f1_value =[test_accuracy, precision, recall, fpr, tpr,f1],feed_dict={handle: test_string})
Hopefully, this will work.

No variation in accuracy and loss for the CNN?

I tried to classify images of 45 classes of 700 images each and perform simple CNN classification with two layers: of batch size: 252, epoch: 30, learning rate: 0.0001, Image size: 256 by 256 by3. I tried to increase as well as decrease the learning rate. Also the data set was split in the ratio 08:0.1:0.1 for training:testing:validation. However the accuracy and loss remains unchanged the loss is always zero. This is the architecture:
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags =
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')'target_image_height', 256, 'train input image height')'target_image_width', 256, 'train input image width')'batch_size', 252, 'batch size of training.')'num_epochs', 30, 'epochs of training.')'learning_rate', 0.0001, 'learning rate of training.')
img_size = 256
datapath_train = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//train//None_train_00000-of-00001.tfrecord'
datapath_validation = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//validation//None_validation_00000-of-00001.tfrecord'
datapath_test = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//test//None_test_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
parsed_example = tf.parse_single_example(tfrecord, features)
image_de =['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
# Pipeline of dataset and iterator
dataset =
# Parse the record into tensors.
dataset =
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
except tf.errors.OutOfRangeError:
#Layer 1
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
#Layer 2
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
layer_conv2, weights_conv2 = new_conv_layer(input=layer_pool1, num_input_channels=32, filter_size=3, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_pool2, [-1, num_features])
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension = 1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train =
dataset_validation =
dataset_test =
# Parse the record into tensors.
dataset_train =
dataset_validation =
dataset_test =
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
dataset_validation = dataset_validation.batch(FLAGS.batch_size)
iterator_validation = dataset_validation.make_initializable_iterator()
next_element_validation = iterator_validation.get_next()
dataset_test = dataset_test.batch(FLAGS.batch_size)
iterator_test = dataset_test.make_initializable_iterator()
next_element_test = iterator_test.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer = tf.summary.FileWriter("Training_FileWriter/")
# create a summary for our cost and accuracy
train_cost_summary = tf.summary.scalar("train_cost", cost)
train_acc_summary = tf.summary.scalar("train_accuracy", accuracy)
test_cost_summary = tf.summary.scalar("test_cost", cost)
test_acc_summary = tf.summary.scalar("test_accuracy", accuracy)"""
with tf.Session() as sess:
# Add the model graph to TensorBoard
# Loop over number of epochs
for epoch in range(FLAGS.num_epochs):
start_time = time.time()
"""train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0"""
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train =
#_, loss_train, acc_train, _train_cost_summary, _train_acc_summary =[optimizer, cost, accuracy, train_cost_summary, train_acc_summary], feed_dict = {x: img_train, y: lbl_train})
_, loss_train, acc_train =[optimizer, cost, accuracy], feed_dict = {x: img_train, y: lbl_train})
#writer.add_summary(_train_cost_summary, epoch +1)
#writer.add_summary(_train_acc_summary, epoch +1)
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
The output after training is as shown below:
Epoch 1 completed : Time usage 122 seconds
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 2 completed : Time usage 120 seconds
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 3 completed : Time usage 120 seconds
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 4 completed : Time usage 120 seconds
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
There is no learning of the model. I have inspected several times, the logic seems to be ok. What could be the probable reason why this is constant even after changing the learning rate, epoch and also i have tried to generate several datasets.
You have made a mistake in cross_entropy, where you are comparing the output with itself.
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
Try this
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
# y_actual should be one-hot labeled vector
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_actual)

Implement TFlearn imdb lstm example by tensorflow

I'm implementing tflearn's lstm imdb example by tensorflow.
I used the same dataset, architecture and hyper-parameters (embedding size, max length of sentence and so on) as tflearn model do, but my model's performance is poor than the tflearn example(after 10 epochs, my model got about 52% accuracy while the example got near 80% ).
I'd appreciated it a lot if you can give me some advice to achieve the appropriate performance of the example.
Below is my code:
import tensorflow as tf
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tensorflow.contrib.rnn import BasicLSTMCell
import time
n_class = 2
n_words = 10000
lr = 1e-3
epoch = 10
TRAIN_SIZE = 22500
validation_size = 2500
batch_size = 128
KP = 0.8
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=n_words,
valid_portion=0.1, sort_by_len=False)
trainX, trainY = train
validationX, validationY = test
testX, testY = _
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=MAX_LENGTH, value=0.)
validationX = pad_sequences(validationX, maxlen=MAX_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, n_class)
validationY = to_categorical(validationY, n_class)
testY = to_categorical(testY, n_class)
graph = tf.Graph()
with graph.as_default():
# input
text = tf.placeholder(tf.int32, [None, MAX_LENGTH])
labels = tf.placeholder(tf.float32, [None, n_class])
keep_prob = tf.placeholder(tf.float32)
embeddings_var = tf.Variable(tf.truncated_normal([n_words, EMBEDDING_SIZE]), trainable=True)
text_embedded = tf.nn.embedding_lookup(embeddings_var, text)
print(text_embedded.shape) # [batch_size, length, embedding_size]
word_list = tf.unstack(text_embedded, axis=1)
cell = BasicLSTMCell(HIDDEN_SIZE)
dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
outputs, encoding = tf.nn.static_rnn(dropout_cell, word_list, dtype=tf.float32)
logits = tf.layers.dense(outputs[-1], n_class, activation=None)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
prediction = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(labels, 1)), tf.float32))
train_steps = epoch * TRAIN_SIZE // batch_size + 1
print("Train steps: ", train_steps)
with tf.Session(graph=graph) as sess:
s = time.time()
offset = 0
for step in range(train_steps):
offset = (offset * step) % (TRAIN_SIZE - batch_size)
batch_text = trainX[offset: offset + batch_size, :]
batch_label = trainY[offset: offset + batch_size, :]
fd = {text: batch_text, labels: batch_label, keep_prob: KP}
_, l, acc =[optimizer, loss, accuracy], feed_dict=fd)
if step % 100 == 0:
print("Step: %d loss: %f accuracy: %f" % (step, l, acc))
if step % 500 == 0:
v_l, v_acc =[loss, accuracy], feed_dict={
text: validationX,
labels: validationY,
keep_prob: 1.0
print("Validation: step: %d loss: %f accuracy: %f" % (step, v_l, v_acc))
print("Training finished, time consumed:", time.time() - s, " s")
print("Test accuracy: %f" % accuracy.eval(feed_dict={
text: testX,
labels: testY,
keep_prob: 1.0
Sorry, I made a stupid mistake!
The loss :
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
are supposed to be
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
then, the accuracy is like the tflearn example

Poker Hand dataset in Tensor flow accuracy very bad

I am trying to train a neural network for Poker Hand Dataset (10 classes). I have tried to change mnist exampe to fit for this. However, for my program, the accuracy is always about 50%, that is so bothersome. How can I improve the accuracy?
def init_weights(shape):
""" Weight initialization """
weights = tf.random_normal(shape, stddev=0.1)
return tf.Variable(weights)
def forwardprop(X, weights, biases):
IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
h = tf.nn.sigmoid(tf.add(tf.matmul(X, weights['w_1']),biases['b_1'])) # The \sigma function
yhat = tf.add(tf.matmul(h, weights['w_2']),biases['b_2']) # The \varphi function
return yhat
def get_data(filename, targetname="target", idname="", test_size=0.10, random_state=200):
#read data from csv
df = pd.read_csv(filename)
data = pd.DataFrame(df.ix[:, df.columns != targetname])
if(idname != str("")):
df = df.drop(idname, 1)
data = pd.DataFrame(df.ix[:, df.columns != targetname])
data = pd.get_dummies(data)
all_X = data.as_matrix()
target = df[targetname]
target = pd.factorize(target)[0]
# Convert target into one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target] # One liner trick!
return train_test_split(all_X, all_Y, test_size=test_size, random_state=random_state)
def main():
start_time = time.time()
train_X, test_X, train_y, test_y = get_data(filename = './data/poker-train.csv', targetname = "class")
#customized for this dataset (or any large dataset), must be chosen as per the data, need to find some generic way
#for small datasets: batch size can be 1 (for more accuracy),
#for large ones: somewhr around 50-80, if taken 1 very slow,50-80 is a trade off of accuracy for time
learning_rate = 0.01
training_epochs = 100
batch_size = 1
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes
h_size = train_X.shape[1] # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes
# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
# Weight initializations
weights = {
'w_1' : init_weights((x_size, h_size)),
'w_2' : init_weights((h_size, y_size))
# Bias initializations
biases = {
'b_1': init_weights([h_size]),
'b_2': init_weights([y_size])
# Forward propagation
yhat = forwardprop(X, weights, biases)
predict = tf.argmax(yhat, axis=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat))
updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
total_batch = int(train_X.shape[0]/batch_size)
# Launch the graph
with tf.Session() as sess:
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
end_i = beg_i + batch_size
if(end_i > train_X.shape[0]):
end_i = train_X.shape[0]
batch_x, batch_y = train_X[beg_i:end_i,:],train_y[beg_i:end_i,:]
beg_i = beg_i + batch_size, feed_dict={X: batch_x, y: batch_y})
train_accuracy = np.mean(np.argmax(train_y, axis=1) ==, feed_dict={X: train_X, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==, feed_dict={X: test_X, y: test_y}))
print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
% (epoch + 1, 100. * train_accuracy, 100. * test_accuracy))
# # Test model
# correct_prediction = tf.equal(tf.argmax(predict, 1), tf.argmax(y, 1))
# # Calculate accuracy
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# print( "Accuracy:", accuracy.eval({X: test_X, y: test_y}))
print("Total time of execution: ",time.time()-start_time)
if __name__ == '__main__':
Output is
Epoch = 100, train accuracy = 55.77%, test accuracy = 55.30%
Epoch = 1, train accuracy = 50.13%, test accuracy = 50.20%
batch_size = 50#1
training_epochs = int(train_X.shape[0]/batch_size)
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes
h_size = 100#train_X.shape[1] # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes
I modify above.
Epoch = 1, train accuracy = 49.98%, test accuracy = 50.11%
Epoch = 500, train accuracy = 90.90%, test accuracy = 90.78%